]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
Merge branch 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
Pull cgroup fixes from Tejun Heo:

 - The destruction path of cgroup objects are asynchronous and
   multi-staged and some of them ended up destroying parents before
   children leading to failures in cpu and memory controllers.  Ensure
   that parents are always destroyed after children.

 - cpuset mm node migration was performed synchronously while holding
   threadgroup and cgroup mutexes and the recent threadgroup locking
   update resulted in a possible deadlock.  The migration is best effort
   and shouldn't have been performed under those locks to begin with.
   Made asynchronous.

 - Minor documentation fix.

* 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  Documentation: cgroup: Fix 'cgroup-legacy' -> 'cgroup-v1'
  cgroup: make sure a parent css isn't freed before its children
  cgroup: make sure a parent css isn't offlined before its children
  cpuset: make mm migration asynchronous

1397 files changed:
.mailmap
Documentation/ABI/testing/configfs-rdma_cm [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-infiniband [new file with mode: 0644]
Documentation/Intel-IOMMU.txt
Documentation/cgroup-v2.txt
Documentation/devicetree/bindings/input/gpio-keys.txt
Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
Documentation/devicetree/bindings/net/mdio-mux.txt
Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/phy.txt
Documentation/devicetree/bindings/net/ralink,rt2880-net.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt [new file with mode: 0644]
Documentation/devicetree/bindings/phy/phy-ath79-usb.txt [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
Documentation/filesystems/proc.txt
Documentation/infiniband/core_locking.txt
Documentation/kernel-parameters.txt
Documentation/kernel-per-CPU-kthreads.txt
Documentation/networking/ip-sysctl.txt
Documentation/sysctl/fs.txt
Documentation/virtual/kvm/api.txt
MAINTAINERS
Makefile
arch/arm/Kconfig.debug
arch/arm/boot/compressed/Makefile
arch/arm/boot/dts/am33xx.dtsi
arch/arm/boot/dts/am4372.dtsi
arch/arm/boot/dts/am437x-gp-evm.dts
arch/arm/boot/dts/am43x-epos-evm.dts
arch/arm/boot/dts/am57xx-cl-som-am57x.dts
arch/arm/boot/dts/am57xx-sbc-am57x.dts
arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
arch/arm/boot/dts/at91-sama5d2_xplained.dts
arch/arm/boot/dts/at91-sama5d4_xplained.dts
arch/arm/boot/dts/at91-sama5d4ek.dts
arch/arm/boot/dts/at91sam9n12ek.dts
arch/arm/boot/dts/kirkwood-lswvl.dts
arch/arm/boot/dts/kirkwood-lswxl.dts
arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap5-board-common.dtsi
arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
arch/arm/boot/dts/r8a7740-armadillo800eva.dts
arch/arm/boot/dts/sama5d4.dtsi
arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/calls.S
arch/arm/mach-omap2/devices.c
arch/arm/mach-omap2/pdata-quirks.c
arch/arm/mach-omap2/sleep34xx.S
arch/arm/mach-omap2/sleep44xx.S
arch/arm/mach-realview/Kconfig
arch/arm/mach-realview/Makefile
arch/arm/mach-realview/platsmp-dt.c
arch/arm/mach-tango/Kconfig
arch/arm/mach-tango/platsmp.c
arch/arm/mach-tegra/Kconfig
arch/arm/mach-tegra/sleep-tegra20.S
arch/arm/mach-tegra/sleep-tegra30.S
arch/arm/mm/dma-mapping.c
arch/arm64/Kconfig.platforms
arch/arm64/Makefile
arch/arm64/boot/dts/Makefile
arch/arm64/boot/dts/arm/juno-base.dtsi
arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
arch/arm64/boot/dts/nvidia/Makefile [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra132-norrin.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra132.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2571.dts [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi [new file with mode: 0644]
arch/arm64/boot/dts/nvidia/tegra210.dtsi [new file with mode: 0644]
arch/arm64/configs/defconfig
arch/arm64/include/asm/futex.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/page.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/head.S
arch/arm64/kernel/image.h
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/sys_regs.c
arch/arm64/mm/dump.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc-macros.S
arch/arm64/mm/proc.S
arch/ia64/include/asm/unistd.h
arch/ia64/include/uapi/asm/unistd.h
arch/ia64/kernel/entry.S
arch/m32r/Kconfig
arch/mips/Kbuild.platforms
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/common/gpiolib.c
arch/mips/ar7/gpio.c
arch/mips/ath79/common.h
arch/mips/ath79/irq.c
arch/mips/ath79/setup.c
arch/mips/bcm47xx/sprom.c
arch/mips/bcm63xx/nvram.c
arch/mips/bmips/setup.c
arch/mips/boot/compressed/Makefile
arch/mips/boot/compressed/uart-prom.c [new file with mode: 0644]
arch/mips/boot/dts/Makefile
arch/mips/boot/dts/brcm/bcm6328.dtsi
arch/mips/boot/dts/brcm/bcm6368.dtsi
arch/mips/boot/dts/brcm/bcm7125.dtsi
arch/mips/boot/dts/brcm/bcm7346.dtsi
arch/mips/boot/dts/brcm/bcm7358.dtsi
arch/mips/boot/dts/brcm/bcm7360.dtsi
arch/mips/boot/dts/brcm/bcm7362.dtsi
arch/mips/boot/dts/brcm/bcm7420.dtsi
arch/mips/boot/dts/brcm/bcm7425.dtsi
arch/mips/boot/dts/brcm/bcm7435.dtsi
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/boot/dts/ingenic/jz4780.dtsi
arch/mips/boot/dts/pic32/Makefile [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda.dtsi [new file with mode: 0644]
arch/mips/boot/dts/pic32/pic32mzda_sk.dts [new file with mode: 0644]
arch/mips/boot/dts/qca/ar9132.dtsi
arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
arch/mips/configs/pic32mzda_defconfig [new file with mode: 0644]
arch/mips/include/asm/cacheops.h
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/elf.h
arch/mips/include/asm/fpu_emulator.h
arch/mips/include/asm/io.h
arch/mips/include/asm/irqflags.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-ath79/ath79.h
arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h [deleted file]
arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/irq.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/pic32.h [new file with mode: 0644]
arch/mips/include/asm/mach-pic32/spaces.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/irq.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7621.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mips-cm.h
arch/mips/include/asm/mips-r2-to-r6-emul.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/page.h
arch/mips/include/asm/pgtable.h
arch/mips/include/uapi/asm/inst.h
arch/mips/kernel/cpu-bugs64.c
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/elf.c
arch/mips/kernel/gpio_txx9.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-cps.c
arch/mips/kernel/sync-r4k.c
arch/mips/kernel/traps.c
arch/mips/kvm/callback.c
arch/mips/kvm/dyntrans.c
arch/mips/kvm/emulate.c
arch/mips/kvm/interrupt.c
arch/mips/kvm/locore.S
arch/mips/kvm/mips.c
arch/mips/kvm/opcode.h [deleted file]
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/mips/lib/mips-atomic.c
arch/mips/loongson64/Platform
arch/mips/loongson64/loongson-3/hpet.c
arch/mips/loongson64/loongson-3/smp.c
arch/mips/math-emu/cp1emu.c
arch/mips/math-emu/dp_simple.c
arch/mips/math-emu/dp_tint.c
arch/mips/math-emu/dp_tlong.c
arch/mips/math-emu/dsemul.c
arch/mips/math-emu/ieee754.c
arch/mips/math-emu/ieee754.h
arch/mips/math-emu/ieee754dp.c
arch/mips/math-emu/ieee754int.h
arch/mips/math-emu/ieee754sp.c
arch/mips/math-emu/sp_fdp.c
arch/mips/math-emu/sp_simple.c
arch/mips/math-emu/sp_tint.c
arch/mips/math-emu/sp_tlong.c
arch/mips/mm/tlbex.c
arch/mips/pci/Makefile
arch/mips/pci/pci-mt7620.c [new file with mode: 0644]
arch/mips/pic32/Kconfig [new file with mode: 0644]
arch/mips/pic32/Makefile [new file with mode: 0644]
arch/mips/pic32/Platform [new file with mode: 0644]
arch/mips/pic32/common/Makefile [new file with mode: 0644]
arch/mips/pic32/common/irq.c [new file with mode: 0644]
arch/mips/pic32/common/reset.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/Makefile [new file with mode: 0644]
arch/mips/pic32/pic32mzda/config.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_clk.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_console.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_pin.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/early_pin.h [new file with mode: 0644]
arch/mips/pic32/pic32mzda/init.c [new file with mode: 0644]
arch/mips/pic32/pic32mzda/pic32mzda.h [new file with mode: 0644]
arch/mips/pic32/pic32mzda/time.c [new file with mode: 0644]
arch/mips/ralink/Kconfig
arch/mips/ralink/Makefile
arch/mips/ralink/Platform
arch/mips/ralink/irq-gic.c [new file with mode: 0644]
arch/mips/ralink/mt7620.c
arch/mips/ralink/mt7621.c [new file with mode: 0644]
arch/mips/ralink/rt288x.c
arch/mips/ralink/rt305x.c
arch/mips/ralink/rt3883.c
arch/mips/ralink/timer-gic.c [new file with mode: 0644]
arch/mips/rb532/gpio.c
arch/mips/txx9/generic/setup.c
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/uapi/asm/unistd.h
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/module_64.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/mem.c
arch/powerpc/perf/power8-pmu.c
arch/powerpc/platforms/cell/spufs/file.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/s390/hypfs/inode.c
arch/s390/include/asm/irqflags.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/pci_io.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/uapi/asm/unistd.h
arch/s390/kernel/compat_wrapper.c
arch/s390/kernel/crash_dump.c
arch/s390/kernel/debug.c
arch/s390/kernel/dumpstack.c
arch/s390/kernel/early.c
arch/s390/kernel/ftrace.c
arch/s390/kernel/ipl.c
arch/s390/kernel/kprobes.c
arch/s390/kernel/perf_event.c
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/s390/kernel/smp.c
arch/s390/kernel/stacktrace.c
arch/s390/kernel/syscalls.S
arch/s390/kernel/traps.c
arch/s390/kvm/Kconfig
arch/s390/kvm/Makefile
arch/s390/kvm/guestdbg.c
arch/s390/kvm/kvm-s390.c
arch/s390/mm/fault.c
arch/s390/mm/init.c
arch/s390/mm/mmap.c
arch/s390/mm/pgtable.c
arch/s390/numa/numa.c
arch/s390/oprofile/backtrace.c
arch/s390/pci/pci.c
arch/s390/pci/pci_event.c
arch/sh/include/asm/barrier.h
arch/um/include/asm/page.h
arch/x86/Kconfig
arch/x86/crypto/chacha20-ssse3-x86_64.S
arch/x86/include/asm/irq.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/pmem.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/cpu/perf_event_intel_uncore.h
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
arch/x86/kernel/head64.c
arch/x86/kernel/irq.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/pageattr.c
arch/x86/platform/efi/quirks.c
arch/x86/platform/intel-mid/intel-mid.c
arch/x86/platform/intel-quark/imr.c
block/Makefile
block/blk-iopoll.c [deleted file]
block/blk-merge.c
block/ioctl.c
block/partition-generic.c
crypto/Kconfig
crypto/af_alg.c
crypto/ahash.c
crypto/algif_hash.c
crypto/algif_skcipher.c
crypto/asymmetric_keys/pkcs7_parser.c
crypto/crc32c_generic.c
crypto/crypto_user.c
crypto/shash.c
crypto/skcipher.c
drivers/acpi/acpi_lpss.c
drivers/acpi/apei/erst.c
drivers/acpi/video_detect.c
drivers/amba/Kconfig
drivers/base/devtmpfs.c
drivers/base/platform-msi.c
drivers/base/platform.c
drivers/base/power/common.c
drivers/base/power/domain.c
drivers/base/regmap/regmap-mmio.c
drivers/block/aoe/aoecmd.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_debugfs.c
drivers/block/drbd/drbd_int.h
drivers/block/rbd.c
drivers/bus/Kconfig
drivers/bus/vexpress-config.c
drivers/char/hw_random/Kconfig
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/mem.c
drivers/char/mspec.c
drivers/char/ps3flash.c
drivers/clocksource/Kconfig
drivers/clocksource/tcb_clksrc.c
drivers/cpufreq/cpufreq-dt.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/pxa2xx-cpufreq.c
drivers/cpuidle/coupled.c
drivers/cpuidle/cpuidle.c
drivers/crypto/Kconfig
drivers/crypto/atmel-aes.c
drivers/crypto/atmel-sha.c
drivers/crypto/caam/ctrl.c
drivers/crypto/marvell/cesa.c
drivers/crypto/qat/qat_common/qat_hal.c
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/iceland_smc.c
drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_dp_mst_topology.c
drivers/gpu/drm/drm_hashtab.c
drivers/gpu/drm/etnaviv/common.xml.h
drivers/gpu/drm/etnaviv/etnaviv_drv.c
drivers/gpu/drm/etnaviv/etnaviv_drv.h
drivers/gpu/drm/etnaviv/etnaviv_dump.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.h
drivers/gpu/drm/etnaviv/state_hi.xml.h
drivers/gpu/drm/exynos/exynos_dp_core.c
drivers/gpu/drm/exynos/exynos_drm_dsi.c
drivers/gpu/drm/exynos/exynos_mixer.c
drivers/gpu/drm/i2c/adv7511.c
drivers/gpu/drm/i2c/adv7511.h
drivers/gpu/drm/i915/Kconfig
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/radeon/dce6_afmt.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_audio.h
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/vce_v1_0.c
drivers/gpu/drm/rockchip/Makefile
drivers/gpu/drm/rockchip/dw-mipi-dsi.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_fb.c
drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
drivers/gpu/drm/rockchip/rockchip_drm_gem.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/fam15h_power.c
drivers/hwspinlock/hwspinlock_core.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-piix4.c
drivers/iio/accel/Kconfig
drivers/iio/adc/Kconfig
drivers/iio/adc/ti_am335x_adc.c
drivers/iio/dac/mcp4725.c
drivers/iio/humidity/dht11.c
drivers/iio/imu/adis_buffer.c
drivers/iio/imu/inv_mpu6050/Kconfig
drivers/iio/inkern.c
drivers/iio/light/acpi-als.c
drivers/iio/light/ltr501.c
drivers/iio/pressure/mpl115.c
drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
drivers/infiniband/Kconfig
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_configfs.c [new file with mode: 0644]
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/cq.c [new file with mode: 0644]
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/multicast.c
drivers/infiniband/core/roce_gid_mgmt.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ud_header.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_marshall.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_cq.c
drivers/infiniband/hw/cxgb3/iwch_mem.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.h
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/cxgb4/user.h
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mlx5/ah.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/user.h
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_mr.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/qib/qib_verbs_mcast.c
drivers/infiniband/hw/usnic/usnic_debugfs.c
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.h
drivers/infiniband/hw/usnic/usnic_vnic.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/infiniband/ulp/isert/isert_proto.h [deleted file]
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/infiniband/ulp/srpt/ib_srpt.h
drivers/input/joystick/xpad.c
drivers/input/keyboard/gpio_keys.c
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/iommu/amd_iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/io-pgtable-arm.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-atmel-aic-common.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-mxs.c
drivers/irqchip/irq-pic32-evic.c [new file with mode: 0644]
drivers/irqchip/irq-s3c24xx.c
drivers/mailbox/Kconfig
drivers/mailbox/pcc.c
drivers/md/bitmap.c
drivers/md/faulty.c
drivers/md/md-cluster.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/media/dvb-frontends/tda1004x.c
drivers/media/i2c/ir-kbd-i2c.c
drivers/media/i2c/s5k6a3.c
drivers/media/pci/saa7134/saa7134-alsa.c
drivers/media/platform/Kconfig
drivers/media/platform/exynos4-is/Kconfig
drivers/media/platform/exynos4-is/fimc-is.c
drivers/media/platform/exynos4-is/fimc-isp-video.c
drivers/media/platform/exynos4-is/media-dev.c
drivers/media/platform/soc_camera/atmel-isi.c
drivers/media/platform/soc_camera/soc_camera.c
drivers/media/platform/vsp1/vsp1_drv.c
drivers/media/platform/vsp1/vsp1_video.c
drivers/media/v4l2-core/videobuf2-core.c
drivers/media/v4l2-core/videobuf2-v4l2.c
drivers/mmc/core/debugfs.c
drivers/mmc/core/pwrseq_simple.c
drivers/mmc/core/sd.c
drivers/mmc/core/sdio.c
drivers/mmc/core/sdio_cis.c
drivers/mmc/host/mmci.c
drivers/mmc/host/tmio_mmc_dma.c
drivers/mtd/bcm63xxpart.c
drivers/mtd/ubi/cdev.c
drivers/net/dsa/mv88e6xxx.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/aurora/nb8800.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/ezchip/Kconfig
drivers/net/ethernet/freescale/Makefile
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
drivers/net/ethernet/hisilicon/hns/hnae.c
drivers/net/ethernet/hisilicon/hns/hnae.h
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_enet.h
drivers/net/ethernet/hp/hp100.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/port.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/srq.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.h [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ethernet/moxa/moxart_ether.h
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/rocker/rocker.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/fddi/defxx.c
drivers/net/geneve.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/irda/bfin_sir.h
drivers/net/macvlan.c
drivers/net/phy/Kconfig
drivers/net/phy/dp83640.c
drivers/net/phy/phy.c
drivers/net/phy/smsc.c
drivers/net/ppp/pptp.c
drivers/net/usb/lan78xx.c
drivers/net/vxlan.c
drivers/net/wireless/ath/ath9k/eeprom.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
drivers/net/wireless/intel/iwlwifi/iwl-7000.c
drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
drivers/net/wireless/intel/iwlwifi/mvm/rs.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/ralink/rt2x00/rt2400pci.c
drivers/net/wireless/ralink/rt2x00/rt2500pci.c
drivers/net/wireless/ralink/rt2x00/rt2500usb.c
drivers/net/wireless/ralink/rt2x00/rt2800lib.c
drivers/net/wireless/ralink/rt2x00/rt2x00.h
drivers/net/wireless/ralink/rt2x00/rt2x00config.c
drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
drivers/net/wireless/ralink/rt2x00/rt61pci.c
drivers/net/wireless/ralink/rt2x00/rt73usb.c
drivers/net/wireless/realtek/rtlwifi/regd.c
drivers/net/xen-netfront.c
drivers/ntb/hw/Kconfig
drivers/ntb/hw/Makefile
drivers/ntb/hw/amd/Kconfig [new file with mode: 0644]
drivers/ntb/hw/amd/Makefile [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.c [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.h [new file with mode: 0644]
drivers/ntb/hw/intel/ntb_hw_intel.c
drivers/ntb/hw/intel/ntb_hw_intel.h
drivers/ntb/ntb_transport.c
drivers/ntb/test/Kconfig
drivers/ntb/test/Makefile
drivers/ntb/test/ntb_perf.c [new file with mode: 0644]
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/pfn_devs.c
drivers/of/irq.c
drivers/of/of_mdio.c
drivers/oprofile/oprofilefs.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel_telemetry_debugfs.c
drivers/pnp/quirks.c
drivers/ptp/ptp_ixp46x.c
drivers/s390/cio/chp.c
drivers/s390/cio/chp.h
drivers/s390/cio/chsc.c
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/scsi/3w-xxxx.c
drivers/scsi/Kconfig
drivers/scsi/NCR5380.c
drivers/scsi/NCR5380.h
drivers/scsi/arm/cumana_1.c
drivers/scsi/arm/oak.c
drivers/scsi/atari_NCR5380.c
drivers/scsi/atari_scsi.c
drivers/scsi/be2iscsi/Kconfig
drivers/scsi/be2iscsi/be.h
drivers/scsi/be2iscsi/be_iscsi.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
drivers/scsi/dmx3191d.c
drivers/scsi/dtc.c
drivers/scsi/dtc.h
drivers/scsi/g_NCR5380.c
drivers/scsi/g_NCR5380.h
drivers/scsi/hisi_sas/Kconfig
drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
drivers/scsi/imm.c
drivers/scsi/ipr.c
drivers/scsi/ipr.h
drivers/scsi/mac_scsi.c
drivers/scsi/megaraid/megaraid_mm.c
drivers/scsi/pas16.c
drivers/scsi/pas16.h
drivers/scsi/scsi_devinfo.c
drivers/scsi/sd.c
drivers/scsi/sg.c
drivers/scsi/sr.c
drivers/scsi/storvsc_drv.c
drivers/scsi/sun3_scsi.c
drivers/scsi/t128.c
drivers/scsi/t128.h
drivers/soc/Kconfig
drivers/soc/qcom/spm.c
drivers/soc/tegra/Kconfig [new file with mode: 0644]
drivers/ssb/main.c
drivers/staging/iio/adc/Kconfig
drivers/staging/iio/meter/ade7753.c
drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
drivers/staging/lustre/lustre/llite/dir.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/llite_lib.c
drivers/staging/lustre/lustre/llite/llite_nfs.c
drivers/staging/lustre/lustre/llite/lloop.c
drivers/staging/lustre/lustre/llite/rw.c
drivers/staging/lustre/lustre/llite/rw26.c
drivers/staging/lustre/lustre/llite/vvp_io.c
drivers/staging/lustre/lustre/llite/vvp_page.c
drivers/staging/panel/panel.c
drivers/staging/rdma/Kconfig
drivers/staging/rdma/Makefile
drivers/staging/rdma/amso1100/Kbuild [deleted file]
drivers/staging/rdma/amso1100/Kconfig [deleted file]
drivers/staging/rdma/amso1100/TODO [deleted file]
drivers/staging/rdma/amso1100/c2.c [deleted file]
drivers/staging/rdma/amso1100/c2.h [deleted file]
drivers/staging/rdma/amso1100/c2_ae.c [deleted file]
drivers/staging/rdma/amso1100/c2_ae.h [deleted file]
drivers/staging/rdma/amso1100/c2_alloc.c [deleted file]
drivers/staging/rdma/amso1100/c2_cm.c [deleted file]
drivers/staging/rdma/amso1100/c2_cq.c [deleted file]
drivers/staging/rdma/amso1100/c2_intr.c [deleted file]
drivers/staging/rdma/amso1100/c2_mm.c [deleted file]
drivers/staging/rdma/amso1100/c2_mq.c [deleted file]
drivers/staging/rdma/amso1100/c2_mq.h [deleted file]
drivers/staging/rdma/amso1100/c2_pd.c [deleted file]
drivers/staging/rdma/amso1100/c2_provider.c [deleted file]
drivers/staging/rdma/amso1100/c2_provider.h [deleted file]
drivers/staging/rdma/amso1100/c2_qp.c [deleted file]
drivers/staging/rdma/amso1100/c2_rnic.c [deleted file]
drivers/staging/rdma/amso1100/c2_status.h [deleted file]
drivers/staging/rdma/amso1100/c2_user.h [deleted file]
drivers/staging/rdma/amso1100/c2_vq.c [deleted file]
drivers/staging/rdma/amso1100/c2_vq.h [deleted file]
drivers/staging/rdma/amso1100/c2_wr.h [deleted file]
drivers/staging/rdma/ehca/Kconfig [deleted file]
drivers/staging/rdma/ehca/Makefile [deleted file]
drivers/staging/rdma/ehca/TODO [deleted file]
drivers/staging/rdma/ehca/ehca_av.c [deleted file]
drivers/staging/rdma/ehca/ehca_classes.h [deleted file]
drivers/staging/rdma/ehca/ehca_classes_pSeries.h [deleted file]
drivers/staging/rdma/ehca/ehca_cq.c [deleted file]
drivers/staging/rdma/ehca/ehca_eq.c [deleted file]
drivers/staging/rdma/ehca/ehca_hca.c [deleted file]
drivers/staging/rdma/ehca/ehca_irq.c [deleted file]
drivers/staging/rdma/ehca/ehca_irq.h [deleted file]
drivers/staging/rdma/ehca/ehca_iverbs.h [deleted file]
drivers/staging/rdma/ehca/ehca_main.c [deleted file]
drivers/staging/rdma/ehca/ehca_mcast.c [deleted file]
drivers/staging/rdma/ehca/ehca_mrmw.c [deleted file]
drivers/staging/rdma/ehca/ehca_mrmw.h [deleted file]
drivers/staging/rdma/ehca/ehca_pd.c [deleted file]
drivers/staging/rdma/ehca/ehca_qes.h [deleted file]
drivers/staging/rdma/ehca/ehca_qp.c [deleted file]
drivers/staging/rdma/ehca/ehca_reqs.c [deleted file]
drivers/staging/rdma/ehca/ehca_sqp.c [deleted file]
drivers/staging/rdma/ehca/ehca_tools.h [deleted file]
drivers/staging/rdma/ehca/ehca_uverbs.c [deleted file]
drivers/staging/rdma/ehca/hcp_if.c [deleted file]
drivers/staging/rdma/ehca/hcp_if.h [deleted file]
drivers/staging/rdma/ehca/hcp_phyp.c [deleted file]
drivers/staging/rdma/ehca/hcp_phyp.h [deleted file]
drivers/staging/rdma/ehca/hipz_fns.h [deleted file]
drivers/staging/rdma/ehca/hipz_fns_core.h [deleted file]
drivers/staging/rdma/ehca/hipz_hw.h [deleted file]
drivers/staging/rdma/ehca/ipz_pt_fn.c [deleted file]
drivers/staging/rdma/ehca/ipz_pt_fn.h [deleted file]
drivers/staging/rdma/hfi1/mr.c
drivers/staging/rdma/hfi1/verbs.c
drivers/staging/rdma/hfi1/verbs.h
drivers/staging/rdma/ipath/Kconfig [deleted file]
drivers/staging/rdma/ipath/Makefile [deleted file]
drivers/staging/rdma/ipath/TODO [deleted file]
drivers/staging/rdma/ipath/ipath_common.h [deleted file]
drivers/staging/rdma/ipath/ipath_cq.c [deleted file]
drivers/staging/rdma/ipath/ipath_debug.h [deleted file]
drivers/staging/rdma/ipath/ipath_diag.c [deleted file]
drivers/staging/rdma/ipath/ipath_dma.c [deleted file]
drivers/staging/rdma/ipath/ipath_driver.c [deleted file]
drivers/staging/rdma/ipath/ipath_eeprom.c [deleted file]
drivers/staging/rdma/ipath/ipath_file_ops.c [deleted file]
drivers/staging/rdma/ipath/ipath_fs.c [deleted file]
drivers/staging/rdma/ipath/ipath_iba6110.c [deleted file]
drivers/staging/rdma/ipath/ipath_init_chip.c [deleted file]
drivers/staging/rdma/ipath/ipath_intr.c [deleted file]
drivers/staging/rdma/ipath/ipath_kernel.h [deleted file]
drivers/staging/rdma/ipath/ipath_keys.c [deleted file]
drivers/staging/rdma/ipath/ipath_mad.c [deleted file]
drivers/staging/rdma/ipath/ipath_mmap.c [deleted file]
drivers/staging/rdma/ipath/ipath_mr.c [deleted file]
drivers/staging/rdma/ipath/ipath_qp.c [deleted file]
drivers/staging/rdma/ipath/ipath_rc.c [deleted file]
drivers/staging/rdma/ipath/ipath_registers.h [deleted file]
drivers/staging/rdma/ipath/ipath_ruc.c [deleted file]
drivers/staging/rdma/ipath/ipath_sdma.c [deleted file]
drivers/staging/rdma/ipath/ipath_srq.c [deleted file]
drivers/staging/rdma/ipath/ipath_stats.c [deleted file]
drivers/staging/rdma/ipath/ipath_sysfs.c [deleted file]
drivers/staging/rdma/ipath/ipath_uc.c [deleted file]
drivers/staging/rdma/ipath/ipath_ud.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_pages.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_sdma.c [deleted file]
drivers/staging/rdma/ipath/ipath_user_sdma.h [deleted file]
drivers/staging/rdma/ipath/ipath_verbs.c [deleted file]
drivers/staging/rdma/ipath/ipath_verbs.h [deleted file]
drivers/staging/rdma/ipath/ipath_verbs_mcast.c [deleted file]
drivers/staging/rdma/ipath/ipath_wc_ppc64.c [deleted file]
drivers/staging/rdma/ipath/ipath_wc_x86_64.c [deleted file]
drivers/staging/speakup/Kconfig
drivers/staging/speakup/main.c
drivers/staging/speakup/selection.c
drivers/staging/speakup/serialio.c
drivers/thermal/int340x_thermal/processor_thermal_device.c
drivers/thermal/intel_pch_thermal.c
drivers/thermal/rcar_thermal.c
drivers/thermal/rockchip_thermal.c
drivers/thermal/step_wise.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_core.h
drivers/tty/n_tty.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/tty_io.c
drivers/tty/tty_mutex.c
drivers/tty/vt/vt.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/core/hub.c
drivers/usb/dwc2/core.c
drivers/usb/dwc2/platform.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/f_printer.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/atmel_usba_udc.c
drivers/usb/host/Kconfig
drivers/usb/host/xhci-ext-caps.h
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-plat.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/musb/ux500.c
drivers/usb/phy/phy-msm-usb.c
drivers/usb/phy/phy-mxs-usb.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/mxu11x0.c
drivers/usb/serial/option.c
drivers/usb/serial/visor.c
drivers/vfio/vfio.c
drivers/video/fbdev/core/fb_defio.c
drivers/virtio/virtio_pci_common.c
drivers/watchdog/Kconfig
drivers/watchdog/max63xx_wdt.c
drivers/watchdog/pcwd_usb.c
drivers/watchdog/sp805_wdt.c
drivers/xen/tmem.c
fs/9p/vfs_file.c
fs/affs/file.c
fs/afs/flock.c
fs/afs/write.c
fs/attr.c
fs/binfmt_elf.c
fs/binfmt_misc.c
fs/block_dev.c
fs/btrfs/async-thread.c
fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/free-space-tree.c
fs/btrfs/inode-map.c
fs/btrfs/inode-map.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/cachefiles/interface.c
fs/cachefiles/namei.c
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_debug.h
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/misc.c
fs/cifs/readdir.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smb2proto.h
fs/cifs/smb2transport.c
fs/cifs/transport.c
fs/coda/coda_linux.h
fs/coda/dir.c
fs/coda/file.c
fs/compat_ioctl.c
fs/configfs/dir.c
fs/configfs/file.c
fs/configfs/inode.c
fs/dax.c
fs/dcache.c
fs/debugfs/inode.c
fs/devpts/inode.c
fs/direct-io.c
fs/dlm/user.c
fs/ecryptfs/inode.c
fs/ecryptfs/mmap.c
fs/efivarfs/file.c
fs/efivarfs/super.c
fs/eventpoll.c
fs/exec.c
fs/exofs/file.c
fs/exportfs/expfs.c
fs/ext2/file.c
fs/ext2/ioctl.c
fs/ext4/crypto.c
fs/ext4/crypto_key.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/truncate.h
fs/f2fs/data.c
fs/f2fs/file.c
fs/fat/dir.c
fs/fat/file.c
fs/filesystems.c
fs/fuse/dir.c
fs/fuse/file.c
fs/gfs2/file.c
fs/gfs2/inode.c
fs/gfs2/quota.c
fs/hfs/dir.c
fs/hfs/inode.c
fs/hfsplus/dir.c
fs/hfsplus/inode.c
fs/hfsplus/ioctl.c
fs/hostfs/hostfs_kern.c
fs/hpfs/dir.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/ioctl.c
fs/jffs2/build.c
fs/jffs2/file.c
fs/jffs2/fs.c
fs/jffs2/super.c
fs/jfs/file.c
fs/jfs/ioctl.c
fs/jfs/super.c
fs/kernfs/dir.c
fs/libfs.c
fs/locks.c
fs/logfs/file.c
fs/namei.c
fs/namespace.c
fs/ncpfs/dir.c
fs/ncpfs/file.c
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/filelayout/filelayout.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs42proc.c
fs/nfs/nfs4file.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/write.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfsfh.h
fs/nfsd/vfs.c
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/ntfs/dir.c
fs/ntfs/file.c
fs/ntfs/quota.c
fs/ntfs/super.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/ioctl.c
fs/ocfs2/journal.c
fs/ocfs2/localalloc.c
fs/ocfs2/move_extents.c
fs/ocfs2/namei.c
fs/ocfs2/quota_global.c
fs/ocfs2/refcounttree.c
fs/ocfs2/resize.c
fs/ocfs2/suballoc.c
fs/ocfs2/xattr.c
fs/open.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/inode.c
fs/overlayfs/readdir.c
fs/overlayfs/super.c
fs/pipe.c
fs/proc/kcore.c
fs/proc/self.c
fs/proc/task_mmu.c
fs/proc/task_nommu.c
fs/proc/thread_self.c
fs/pstore/inode.c
fs/quota/dquot.c
fs/read_write.c
fs/readdir.c
fs/reiserfs/dir.c
fs/reiserfs/file.c
fs/reiserfs/ioctl.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/timerfd.c
fs/tracefs/inode.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/xattr.c
fs/udf/file.c
fs/udf/inode.c
fs/udf/super.c
fs/utimes.c
fs/xattr.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_fs.h
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_trans_ail.c
include/acpi/cppc_acpi.h
include/crypto/hash.h
include/crypto/if_alg.h
include/crypto/skcipher.h
include/drm/drm_atomic_helper.h
include/drm/drm_cache.h
include/drm/drm_dp_mst_helper.h
include/drm/drm_fixed.h
include/linux/bcm963xx_nvram.h [new file with mode: 0644]
include/linux/bcm963xx_tag.h [new file with mode: 0644]
include/linux/blk-iopoll.h [deleted file]
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_frag.h
include/linux/ceph/messenger.h
include/linux/cleancache.h
include/linux/crush/crush.h
include/linux/dax.h
include/linux/fs.h
include/linux/ftrace.h
include/linux/gfp.h
include/linux/hrtimer.h
include/linux/huge_mm.h
include/linux/interrupt.h
include/linux/iommu.h
include/linux/irq_poll.h [new file with mode: 0644]
include/linux/irqdomain.h
include/linux/memcontrol.h
include/linux/mlx4/cmd.h
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/qp.h
include/linux/mlx5/transobj.h [new file with mode: 0644]
include/linux/mlx5/vport.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmzone.h
include/linux/module.h
include/linux/netdevice.h
include/linux/of.h
include/linux/pagemap.h
include/linux/perf_event.h
include/linux/pfn_t.h
include/linux/pipe_fs_i.h
include/linux/platform_data/sdhci-pic32.h [new file with mode: 0644]
include/linux/pmem.h
include/linux/radix-tree.h
include/linux/raid/pq.h
include/linux/rmap.h
include/linux/sched.h
include/linux/shmem_fs.h
include/linux/sunrpc/svc_rdma.h
include/linux/swiotlb.h
include/linux/thermal.h
include/linux/tty.h
include/linux/workqueue.h
include/media/videobuf2-core.h
include/net/bluetooth/l2cap.h
include/net/dst_metadata.h
include/net/ip6_route.h
include/net/netfilter/nf_conntrack_core.h
include/net/sctp/structs.h
include/net/sock.h
include/net/sock_reuseport.h
include/net/tcp.h
include/rdma/ib_addr.h
include/rdma/ib_cache.h
include/rdma/ib_mad.h
include/rdma/ib_pack.h
include/rdma/ib_pma.h
include/rdma/ib_sa.h
include/rdma/ib_verbs.h
include/scsi/iser.h [new file with mode: 0644]
include/sound/rawmidi.h
include/sound/timer.h
include/trace/events/ext4.h
include/trace/events/fence.h
include/trace/events/huge_memory.h
include/trace/events/irq.h
include/uapi/drm/etnaviv_drm.h
include/uapi/linux/fs.h
ipc/mqueue.c
ipc/sem.c
ipc/util.c
ipc/util.h
kernel/audit_fsnotify.c
kernel/audit_watch.c
kernel/bpf/arraymap.c
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/events/ring_buffer.c
kernel/futex.c
kernel/irq/handle.c
kernel/irq/irqdomain.c
kernel/locking/rtmutex.c
kernel/memremap.c
kernel/module.c
kernel/pid.c
kernel/power/Kconfig
kernel/relay.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/seccomp.c
kernel/signal.c
kernel/sysctl.c
kernel/time/hrtimer.c
kernel/time/itimer.c
kernel/time/ntp.c
kernel/time/posix-timers.c
kernel/time/tick-sched.c
kernel/time/timer_list.c
kernel/trace/bpf_trace.c
kernel/trace/trace.c
kernel/trace/trace_stack.c
kernel/workqueue.c
lib/Kconfig
lib/Kconfig.debug
lib/Makefile
lib/debugobjects.c
lib/dump_stack.c
lib/irq_poll.c [new file with mode: 0644]
lib/libcrc32c.c
lib/radix-tree.c
lib/ratelimit.c
lib/scatterlist.c
lib/test-string_helpers.c
mm/Kconfig
mm/backing-dev.c
mm/cleancache.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/memblock.c
mm/memcontrol.c
mm/memory.c
mm/mempolicy.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/page_alloc.c
mm/percpu.c
mm/shmem.c
mm/swapfile.c
mm/truncate.c
mm/util.c
mm/vmpressure.c
mm/vmscan.c
mm/vmstat.c
mm/workingset.c
net/9p/trans_fd.c
net/9p/trans_virtio.c
net/bluetooth/6lowpan.c
net/bluetooth/hci_request.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bluetooth/smp.c
net/bridge/br.c
net/ceph/auth_x.c
net/ceph/auth_x.h
net/ceph/crush/mapper.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/core/dev.c
net/core/sock_reuseport.c
net/ipv4/Kconfig
net/ipv4/fib_trie.c
net/ipv4/inet_diag.c
net/ipv4/ip_fragment.c
net/ipv4/ip_input.c
net/ipv4/ipconfig.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv6/Kconfig
net/ipv6/datagram.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/ipv6/sit.c
net/ipv6/udp.c
net/irda/ircomm/ircomm_param.c
net/iucv/af_iucv.c
net/mac80211/ibss.c
net/mac80211/main.c
net/mac80211/mesh.c
net/mac80211/mesh.h
net/mac80211/mlme.c
net/mac80211/offchannel.c
net/mac80211/scan.c
net/mac80211/sta_info.c
net/mac80211/status.c
net/mac80211/util.c
net/netfilter/ipset/ip_set_hash_netiface.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_tables_netdev.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_ct.c
net/netfilter/xt_TCPMSS.c
net/netlink/af_netlink.c
net/rds/ib.c
net/rds/iw.c
net/rfkill/core.c
net/sched/sch_drr.c
net/sctp/input.c
net/sctp/proc.c
net/sctp/sm_sideeffect.c
net/sctp/socket.c
net/sctp/transport.c
net/sunrpc/cache.c
net/sunrpc/rpc_pipe.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/Makefile
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c [new file with mode: 0644]
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/switchdev/switchdev.c
net/tipc/subscr.c
net/unix/af_unix.c
net/wireless/reg.c
scripts/mod/modpost.c
scripts/prune-kernel [new file with mode: 0755]
security/inode.c
security/integrity/ima/ima_main.c
security/keys/key.c
security/selinux/selinuxfs.c
sound/core/Kconfig
sound/core/compress_offload.c
sound/core/control.c
sound/core/hrtimer.c
sound/core/oss/pcm_oss.c
sound/core/pcm_compat.c
sound/core/rawmidi.c
sound/core/seq/oss/seq_oss_init.c
sound/core/seq/oss/seq_oss_synth.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_compat.c
sound/core/seq/seq_ports.c
sound/core/seq/seq_timer.c
sound/core/seq/seq_virmidi.c
sound/core/timer.c
sound/drivers/dummy.c
sound/firewire/bebob/bebob_stream.c
sound/hda/hdac_i915.c
sound/isa/Kconfig
sound/pci/Kconfig
sound/pci/emu10k1/emu10k1_main.c
sound/pci/hda/hda_bind.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/sparc/Kconfig
sound/spi/at73c213.c
sound/usb/quirks.c
tools/lib/traceevent/event-parse.c
tools/perf/Makefile.perf
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/config/Makefile
tools/perf/tests/make
tools/perf/ui/browsers/annotate.c
tools/perf/util/hist.c
tools/perf/util/session.c
tools/perf/util/stat.c
tools/perf/util/symbol.c
tools/perf/util/trace-event-parse.c
tools/testing/nvdimm/test/iomap.c
tools/testing/selftests/timers/valid-adjtimex.c
tools/virtio/asm/barrier.h
tools/virtio/linux/compiler.h [new file with mode: 0644]
tools/virtio/linux/kernel.h
tools/virtio/ringtest/Makefile [new file with mode: 0644]
tools/virtio/ringtest/README [new file with mode: 0644]
tools/virtio/ringtest/main.c [new file with mode: 0644]
tools/virtio/ringtest/main.h [new file with mode: 0644]
tools/virtio/ringtest/ring.c [new file with mode: 0644]
tools/virtio/ringtest/run-on-all.sh [new file with mode: 0755]
tools/virtio/ringtest/virtio_ring_0_9.c [new file with mode: 0644]
tools/virtio/ringtest/virtio_ring_poll.c [new file with mode: 0644]

index b1e9a97653dc64853775a97db377a8f269cc8d95..7e6c5334c337ae0c792a36ec5a2e9309b4128061 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/ABI/testing/configfs-rdma_cm b/Documentation/ABI/testing/configfs-rdma_cm
new file mode 100644 (file)
index 0000000..5c389aa
--- /dev/null
@@ -0,0 +1,22 @@
+What:          /config/rdma_cm
+Date:          November 29, 2015
+KernelVersion:  4.4.0
+Description:   Interface is used to configure RDMA-cable HCAs in respect to
+               RDMA-CM attributes.
+
+               Attributes are visible only when configfs is mounted. To mount
+               configfs in /config directory use:
+               # mount -t configfs none /config/
+
+               In order to set parameters related to a specific HCA, a directory
+               for this HCA has to be created:
+               mkdir -p /config/rdma_cm/<hca>
+
+
+What:          /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
+Date:          November 29, 2015
+KernelVersion:  4.4.0
+Description:   RDMA-CM based connections from HCA <hca> at port <port-num>
+               will be initiated with this RoCE type as default.
+               The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
+               This parameter has RW access.
diff --git a/Documentation/ABI/testing/sysfs-class-infiniband b/Documentation/ABI/testing/sysfs-class-infiniband
new file mode 100644 (file)
index 0000000..a86abe6
--- /dev/null
@@ -0,0 +1,16 @@
+What:          /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
+Date:          November 29, 2015
+KernelVersion: 4.4.0
+Contact:       linux-rdma@vger.kernel.org
+Description:   The net-device's name associated with the GID resides
+               at index <gid-index>.
+
+What:          /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
+Date:          November 29, 2015
+KernelVersion: 4.4.0
+Contact:       linux-rdma@vger.kernel.org
+Description:   The RoCE type of the associated GID resides at index <gid-index>.
+               This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
+               or "RoCE v2" for RoCE v2 based GIDs.
+
+
index 7b57fc087088f49756eeb8eaabf403bfbbd92b93..49585b6e1ea24c951ea58090b762740aa36b066d 100644 (file)
@@ -3,7 +3,7 @@ Linux IOMMU Support
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 
index 9ae148ab12559fcf1621a5fb66042ad020d98606..ff49cf901148d895b765800ec6ddb79c0e38ed53 100644 (file)
@@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back.
                Amount of memory used to cache filesystem data,
                including tmpfs and shared memory.
 
+         sock
+
+               Amount of memory used in network transmission buffers
+
          file_mapped
 
                Amount of cached filesystem data mapped with mmap()
index cf1333d1dd52974e4e960a7c97418f86864e56f1..21641236c095dfc257b0d5375ef0e8710129e690 100644 (file)
@@ -6,6 +6,7 @@ Required properties:
 Optional properties:
        - autorepeat: Boolean, Enable auto repeat feature of Linux input
          subsystem.
+       - label: String, name of the input device.
 
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt b/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt
new file mode 100644 (file)
index 0000000..c3a1b37
--- /dev/null
@@ -0,0 +1,67 @@
+Microchip PIC32 Interrupt Controller
+====================================
+
+The Microchip PIC32 contains an Enhanced Vectored Interrupt Controller (EVIC).
+It handles all internal and external interrupts. This controller exists outside
+of the CPU and is the arbitrator of all interrupts (including interrupts from
+the CPU itself) before they are presented to the CPU.
+
+External interrupts have a software configurable edge polarity. Non external
+interrupts have a type and polarity that is determined by the source of the
+interrupt.
+
+Required properties
+-------------------
+
+- compatible: Should be "microchip,pic32mzda-evic"
+- reg: Specifies physical base address and size of register range.
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt cells: Specifies the number of cells used to encode an interrupt
+  source connected to this controller. The value shall be 2 and interrupt
+  descriptor shall have the following format:
+
+       <hw_irq irq_type>
+
+  hw_irq - represents the hardware interrupt number as in the data sheet.
+  irq_type - is used to describe the type and polarity of an interrupt. For
+  internal interrupts use IRQ_TYPE_EDGE_RISING for non persistent interrupts and
+  IRQ_TYPE_LEVEL_HIGH for persistent interrupts. For external interrupts use
+  IRQ_TYPE_EDGE_RISING or IRQ_TYPE_EDGE_FALLING to select the desired polarity.
+
+Optional properties
+-------------------
+- microchip,external-irqs: u32 array of external interrupts with software
+  polarity configuration. This array corresponds to the bits in the INTCON
+  SFR.
+
+Example
+-------
+
+evic: interrupt-controller@1f810000 {
+       compatible = "microchip,pic32mzda-evic";
+       interrupt-controller;
+       #interrupt-cells = <2>;
+       reg = <0x1f810000 0x1000>;
+       microchip,external-irqs = <3 8 13 18 23>;
+};
+
+Each device/peripheral must request its interrupt line with the associated type
+and polarity.
+
+Internal interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+       ...
+       interrupts = <113 IRQ_TYPE_LEVEL_HIGH>;
+       ...
+};
+
+External interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+       ...
+       interrupts = <3 IRQ_TYPE_EDGE_RISING>;
+       ...
+};
diff --git a/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt b/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt
new file mode 100644 (file)
index 0000000..1c8dbc4
--- /dev/null
@@ -0,0 +1,31 @@
+* Microchip PIC32MZDA Platforms
+
+PIC32MZDA Starter Kit
+Required root node properties:
+    - compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda"
+
+CPU nodes:
+----------
+A "cpus" node is required.  Required properties:
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+A CPU sub-node is also required.  Required properties:
+ - device_type: Must be "cpu".
+ - compatible: Must be "mti,mips14KEc".
+Example:
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu0: cpu@0 {
+                       device_type = "cpu";
+                       compatible = "mti,mips14KEc";
+               };
+       };
+
+Boot protocol
+--------------
+In accordance with Unified Hosting Interface Reference Manual (MD01069), the
+bootloader must pass the following arguments to the kernel:
+ - $a0: -2.
+ - $a1: KSEG0 address of the flattened device-tree blob.
index 451fef26b4dfaf05b6782099e54816d52a52baa8..10587bdadbbe5a8e8fe703e23c194420e3701f9f 100644 (file)
@@ -68,7 +68,7 @@ ethernet@f0b60000 {
                phy1: ethernet-phy@1 {
                        max-speed = <1000>;
                        reg = <0x1>;
-                       compatible = "brcm,28nm-gphy", "ethernet-phy-ieee802.3-c22";
+                       compatible = "ethernet-phy-ieee802.3-c22";
                };
        };
 };
@@ -115,7 +115,7 @@ ethernet@f0ba0000 {
                phy0: ethernet-phy@0 {
                        max-speed = <1000>;
                        reg = <0x0>;
-                       compatible = "brcm,bcm53125", "ethernet-phy-ieee802.3-c22";
+                       compatible = "ethernet-phy-ieee802.3-c22";
                };
        };
 };
index 80411b2f04906bc065513cb8a968f1a57f132906..ecacfa44b1eb9603d8b77c712dcff565ef4f470d 100644 (file)
@@ -4,8 +4,6 @@ Required properties:
 - compatible: should be "hisilicon,hns-dsaf-v1" or "hisilicon,hns-dsaf-v2".
   "hisilicon,hns-dsaf-v1" is for hip05.
   "hisilicon,hns-dsaf-v2" is for Hi1610 and Hi1612.
-- dsa-name: dsa fabric name who provide this interface.
-  should be "dsafX", X is the dsaf id.
 - mode: dsa fabric mode string. only support one of dsaf modes like these:
                "2port-64vf",
                "6port-16rss",
@@ -26,9 +24,8 @@ Required properties:
 
 Example:
 
-dsa: dsa@c7000000 {
+dsaf0: dsa@c7000000 {
        compatible = "hisilicon,hns-dsaf-v1";
-       dsa_name = "dsaf0";
        mode = "6port-16rss";
        interrupt-parent = <&mbigen_dsa>;
        reg = <0x0 0xC0000000 0x0 0x420000
index 41d19be7011ecd543af6c1226d2f332b8306a01e..e6a9d1c30878f89ff948d5fcd8cbe308e53327df 100644 (file)
@@ -4,8 +4,9 @@ Required properties:
 - compatible: "hisilicon,hns-nic-v1" or "hisilicon,hns-nic-v2".
   "hisilicon,hns-nic-v1" is for hip05.
   "hisilicon,hns-nic-v2" is for Hi1610 and Hi1612.
-- ae-name: accelerator name who provides this interface,
-  is simply a name referring to the name of name in the accelerator node.
+- ae-handle: accelerator engine handle for hns,
+  specifies a reference to the associating hardware driver node.
+  see Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
 - port-id: is the index of port provided by DSAF (the accelerator). DSAF can
   connect to 8 PHYs. Port 0 to 1 are both used for adminstration purpose. They
   are called debug ports.
@@ -41,7 +42,7 @@ Example:
 
        ethernet@0{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <0>;
                local-mac-address = [a2 14 e4 4b 56 76];
        };
index aeea50c84e921fb301fc7fe9fd2cd6f295c7139c..d0cb8693963b51f447afbb9b6e6c5bd42e87f34e 100644 (file)
@@ -6,12 +6,17 @@ Required properties:
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
 - phy-mode: See ethernet.txt file in the same directory
-- clocks: a pointer to the reference clock for this device.
+- clocks: List of clocks for this device. At least one clock is
+  mandatory for the core clock. If several clocks are given, then the
+  clock-names property must be used to identify them.
 
 Optional properties:
 - tx-csum-limit: maximum mtu supported by port that allow TX checksum.
   Value is presented in bytes. If not used, by default 1600B is set for
   "marvell,armada-370-neta" and 9800B for others.
+- clock-names: List of names corresponding to clocks property; shall be
+  "core" for core clock and "bus" for the optional bus clock.
+
 
 Example:
 
index 79384113c2b060a4068413d427d3509cdcaa2d5b..694987d3c17a1085ba9fce589d81cc9ee99a7717 100644 (file)
@@ -38,7 +38,6 @@ Example :
 
                        phy11: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -48,7 +47,6 @@ Example :
                        };
                        phy12: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -58,7 +56,6 @@ Example :
                        };
                        phy13: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -68,7 +65,6 @@ Example :
                        };
                        phy14: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -85,7 +81,6 @@ Example :
 
                        phy21: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -95,7 +90,6 @@ Example :
                        };
                        phy22: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -105,7 +99,6 @@ Example :
                        };
                        phy23: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -115,7 +108,6 @@ Example :
                        };
                        phy24: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
index f65606f8d632b86bab2af28eb4a198d426a378ab..491f5bd55203b31c62b63ed2bcc624a701fd1580 100644 (file)
@@ -47,7 +47,6 @@ Example :
 
                        phy11: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -57,7 +56,6 @@ Example :
                        };
                        phy12: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -67,7 +65,6 @@ Example :
                        };
                        phy13: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -77,7 +74,6 @@ Example :
                        };
                        phy14: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -94,7 +90,6 @@ Example :
 
                        phy21: ethernet-phy@1 {
                                reg = <1>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -104,7 +99,6 @@ Example :
                        };
                        phy22: ethernet-phy@2 {
                                reg = <2>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -114,7 +108,6 @@ Example :
                        };
                        phy23: ethernet-phy@3 {
                                reg = <3>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
@@ -124,7 +117,6 @@ Example :
                        };
                        phy24: ethernet-phy@4 {
                                reg = <4>;
-                               compatible = "marvell,88e1149r";
                                marvell,reg-init = <3 0x10 0 0x5777>,
                                        <3 0x11 0 0x00aa>,
                                        <3 0x12 0 0x4105>,
diff --git a/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt b/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt
new file mode 100644 (file)
index 0000000..aa63130
--- /dev/null
@@ -0,0 +1,26 @@
+Mediatek Gigabit Switch
+=======================
+
+The mediatek gigabit switch can be found on Mediatek SoCs (mt7620, mt7621).
+
+Required properties:
+- compatible: Should be "mediatek,mt7620-gsw" or "mediatek,mt7621-gsw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the gigabit switches interrupt
+- resets: Should contain the gigabit switches resets
+- reset-names: Should contain the reset names "gsw"
+
+Example:
+
+gsw@10110000 {
+       compatible = "ralink,mt7620-gsw";
+       reg = <0x10110000 8000>;
+
+       resets = <&rstctrl 23>;
+       reset-names = "gsw";
+
+       interrupt-parent = <&intc>;
+       interrupts = <17>;
+};
index 525e1658f2da5ca9ed22594fda97a25646ac2e20..bc1c3c8bf8fa37fa7e08dcabc65f1752a8a79749 100644 (file)
@@ -17,8 +17,7 @@ Optional Properties:
   "ethernet-phy-ieee802.3-c22" or "ethernet-phy-ieee802.3-c45" for
   PHYs that implement IEEE802.3 clause 22 or IEEE802.3 clause 45
   specifications. If neither of these are specified, the default is to
-  assume clause 22. The compatible list may also contain other
-  elements.
+  assume clause 22.
 
   If the phy's identifier is known then the list may contain an entry
   of the form: "ethernet-phy-idAAAA.BBBB" where
@@ -28,6 +27,9 @@ Optional Properties:
             4 hex digits. This is the chip vendor OUI bits 19:24,
             followed by 10 bits of a vendor specific ID.
 
+  The compatible list should not contain other values than those
+  listed here.
+
 - max-speed: Maximum PHY supported speed (10, 100, 1000...)
 
 - broken-turn-around: If set, indicates the PHY device does not correctly
diff --git a/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt b/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt
new file mode 100644 (file)
index 0000000..88b095d
--- /dev/null
@@ -0,0 +1,61 @@
+Ralink Frame Engine Ethernet controller
+=======================================
+
+The Ralink frame engine ethernet controller can be found on Ralink and
+Mediatek SoCs (RT288x, RT3x5x, RT366x, RT388x, rt5350, mt7620, mt7621, mt76x8).
+
+Depending on the SoC, there is a number of ports connected to the CPU port
+directly and/or via a (gigabit-)switch.
+
+* Ethernet controller node
+
+Required properties:
+- compatible: Should be one of "ralink,rt2880-eth", "ralink,rt3050-eth",
+  "ralink,rt3050-eth", "ralink,rt3883-eth", "ralink,rt5350-eth",
+  "mediatek,mt7620-eth", "mediatek,mt7621-eth"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the frame engines interrupt
+- resets: Should contain the frame engines resets
+- reset-names: Should contain the reset names "fe". If a switch is present
+  "esw" is also required.
+
+
+* Ethernet port node
+
+Required properties:
+- compatible: Should be "ralink,eth-port"
+- reg: The number of the physical port
+- phy-handle: reference to the node describing the phy
+
+Example:
+
+mdio-bus {
+       ...
+       phy0: ethernet-phy@0 {
+               phy-mode = "mii";
+               reg = <0>;
+       };
+};
+
+ethernet@400000 {
+       compatible = "ralink,rt2880-eth";
+       reg = <0x00400000 10000>;
+
+       #address-cells = <1>;
+       #size-cells = <0>;
+
+       resets = <&rstctrl 18>;
+       reset-names = "fe";
+
+       interrupt-parent = <&cpuintc>;
+       interrupts = <5>;
+
+       port@0 {
+               compatible = "ralink,eth-port";
+               reg = <0>;
+               phy-handle = <&phy0>;
+       };
+
+};
diff --git a/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt b/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt
new file mode 100644 (file)
index 0000000..2e79bd3
--- /dev/null
@@ -0,0 +1,32 @@
+Ralink Fast Ethernet Embedded Switch
+====================================
+
+The ralink fast ethernet embedded switch can be found on Ralink and Mediatek
+SoCs (RT3x5x, RT5350, MT76x8).
+
+Required properties:
+- compatible: Should be "ralink,rt3050-esw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- interrupts: Should contain the embedded switches interrupt
+- resets: Should contain the embedded switches resets
+- reset-names: Should contain the reset names "esw"
+
+Optional properties:
+- ralink,portmap: can be used to choose if the default switch setup is
+  llllw or wllll
+- ralink,led_polarity: override the active high/low settings of the leds
+
+Example:
+
+esw@10110000 {
+       compatible = "ralink,rt3050-esw";
+       reg = <0x10110000 8000>;
+
+       resets = <&rstctrl 23>;
+       reset-names = "esw";
+
+       interrupt-parent = <&intc>;
+       interrupts = <17>;
+};
diff --git a/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt b/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
new file mode 100644 (file)
index 0000000..cafe219
--- /dev/null
@@ -0,0 +1,18 @@
+* Atheros AR71XX/9XXX USB PHY
+
+Required properties:
+- compatible: "qca,ar7100-usb-phy"
+- #phys-cells: should be 0
+- reset-names: "usb-phy"[, "usb-suspend-override"]
+- resets: references to the reset controllers
+
+Example:
+
+       usb-phy {
+               compatible = "qca,ar7100-usb-phy";
+
+               reset-names = "usb-phy", "usb-suspend-override";
+               resets = <&rst 4>, <&rst 3>;
+
+               #phy-cells = <0>;
+       };
index 0dfa60d88dd3b4bb4ce2a42d2c0b3d15f4835e45..08efe6bc21935fa36785612043489faab5ba33c7 100644 (file)
@@ -2,8 +2,10 @@
 
 Required properties:
 - compatible : should be "rockchip,<name>-tsadc"
+   "rockchip,rk3228-tsadc": found on RK3228 SoCs
    "rockchip,rk3288-tsadc": found on RK3288 SoCs
    "rockchip,rk3368-tsadc": found on RK3368 SoCs
+   "rockchip,rk3399-tsadc": found on RK3399 SoCs
 - reg : physical base address of the controller and length of memory mapped
        region.
 - interrupts : The interrupt number to the cpu. The interrupt specifier format
index fde9fd06fa988b2eac60f10880973351e783fe37..843b045b4069a62c154bdf6c2723b3061bd81abd 100644 (file)
@@ -240,8 +240,8 @@ Table 1-2: Contents of the status files (as of 4.1)
  RssFile                     size of resident file mappings
  RssShmem                    size of resident shmem memory (includes SysV shm,
                              mapping of tmpfs and shared anonymous mappings)
- VmData                      size of data, stack, and text segments
- VmStk                       size of data, stack, and text segments
+ VmData                      size of private data segments
+ VmStk                       size of stack segments
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
@@ -356,7 +356,7 @@ address           perms offset  dev   inode      pathname
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
 a7cb2000-a7eb2000 rw-p 00000000 00:00 0
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack:1001]
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
 a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
 a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
@@ -388,7 +388,6 @@ is not associated with a file:
 
  [heap]                   = the heap of the program
  [stack]                  = the stack of the main process
- [stack:1001]             = the stack of the thread with tid 1001
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
 
@@ -396,10 +395,8 @@ is not associated with a file:
 
 The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
 of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. This is a key difference from the
-content of /proc/PID/maps, where you will see all mappings that are being used
-as stack by all of those tasks. Hence, for the example above, the task-level
-map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
+as [stack] if that task sees it as a stack. Hence, for the example above, the
+task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
 
 08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
index e1678542279a2d2ed8b3dd3222db81b267f7c2d4..4b1f36b6ada034000d3d8e80831f79b4921f0ab4 100644 (file)
@@ -15,7 +15,6 @@ Sleeping and interrupt context
     modify_ah
     query_ah
     destroy_ah
-    bind_mw
     post_send
     post_recv
     poll_cq
@@ -31,7 +30,6 @@ Sleeping and interrupt context
     ib_modify_ah
     ib_query_ah
     ib_destroy_ah
-    ib_bind_mw
     ib_post_send
     ib_post_recv
     ib_req_notify_cq
index cfb2c0f1a4a89237ce131999d7b1cc03aff178b4..9a53c929f017d16527270bc2244352edf1d34cd8 100644 (file)
@@ -1454,6 +1454,41 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        In such case C2/C3 won't be used again.
                        idle=nomwait: Disable mwait for CPU C-states
 
+       ieee754=        [MIPS] Select IEEE Std 754 conformance mode
+                       Format: { strict | legacy | 2008 | relaxed }
+                       Default: strict
+
+                       Choose which programs will be accepted for execution
+                       based on the IEEE 754 NaN encoding(s) supported by
+                       the FPU and the NaN encoding requested with the value
+                       of an ELF file header flag individually set by each
+                       binary.  Hardware implementations are permitted to
+                       support either or both of the legacy and the 2008 NaN
+                       encoding mode.
+
+                       Available settings are as follows:
+                       strict  accept binaries that request a NaN encoding
+                               supported by the FPU
+                       legacy  only accept legacy-NaN binaries, if supported
+                               by the FPU
+                       2008    only accept 2008-NaN binaries, if supported
+                               by the FPU
+                       relaxed accept any binaries regardless of whether
+                               supported by the FPU
+
+                       The FPU emulator is always able to support both NaN
+                       encodings, so if no FPU hardware is present or it has
+                       been disabled with 'nofpu', then the settings of
+                       'legacy' and '2008' strap the emulator accordingly,
+                       'relaxed' straps the emulator for both legacy-NaN and
+                       2008-NaN, whereas 'strict' enables legacy-NaN only on
+                       legacy processors and both NaN encodings on MIPS32 or
+                       MIPS64 CPUs.
+
+                       The setting for ABS.fmt/NEG.fmt instruction execution
+                       mode generally follows that for the NaN encoding,
+                       except where unsupported by hardware.
+
        ignore_loglevel [KNL]
                        Ignore loglevel setting - this will print /all/
                        kernel messages to the console. Useful for debugging.
@@ -1461,6 +1496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        could change it dynamically, usually by
                        /sys/module/printk/parameters/ignore_loglevel.
 
+       ignore_rlimit_data
+                       Ignore RLIMIT_DATA setting for data mappings,
+                       print warning at first misuse.  Can be changed via
+                       /sys/module/kernel/parameters/ignore_rlimit_data.
+
        ihash_entries=  [KNL]
                        Set number of hash buckets for inode cache.
 
@@ -4195,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        The default value of this parameter is determined by
                        the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
 
+       workqueue.debug_force_rr_cpu
+                       Workqueue used to implicitly guarantee that work
+                       items queued without explicit CPU specified are put
+                       on the local CPU.  This guarantee is no longer true
+                       and while local CPU is still preferred work items
+                       may be put on foreign CPUs.  This debug option
+                       forces round-robin CPU selection to flush out
+                       usages which depend on the now broken guarantee.
+                       When enabled, memory and cache locality will be
+                       impacted.
+
        x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
                        default x2apic cluster mode on platforms
                        supporting x2apic.
index f4cbfe0ba1085b4df3067dcc457219699c5c6150..edec3a3e648d12eee550f953b6dec87ab58e34f8 100644 (file)
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ:  Do all of the following:
        from being initiated from tasks that might run on the CPU to
        be de-jittered.  (It is OK to force this CPU offline and then
        bring it back online before you start your application.)
-BLOCK_IOPOLL_SOFTIRQ:  Do all of the following:
+IRQ_POLL_SOFTIRQ:  Do all of the following:
 1.     Force block-device interrupts onto some other CPU.
 2.     Initiate any block I/O and block-I/O polling on other CPUs.
 3.     Once your application has started, prevent CPU-hotplug operations
index ceb44a095a27ba98804442c1f7aa91ed616b7d42..73b36d7c7b0d67309cce7bf0bd8a46f0c6ee7ea1 100644 (file)
@@ -594,7 +594,7 @@ tcp_fastopen - INTEGER
 
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
-       will be retransmitted. Should not be higher than 255. Default value
+       will be retransmitted. Should not be higher than 127. Default value
        is 6, which corresponds to 63seconds till the last retransmission
        with the current initial RTO of 1second. With this the final timeout
        for an active TCP connection attempt will happen after 127seconds.
index 88152f214f48cb69c643d4bf2ff2ac9a61ad2eb0..302b5ed616a6b2a5360e88e519140284b2f0e0d0 100644 (file)
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
 - nr_open
 - overflowuid
 - overflowgid
+- pipe-user-pages-hard
+- pipe-user-pages-soft
 - protected_hardlinks
 - protected_symlinks
 - suid_dumpable
@@ -159,6 +161,27 @@ The default is 65534.
 
 ==============================================================
 
+pipe-user-pages-hard:
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+==============================================================
+
+pipe-user-pages-soft:
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to a single page. Once this limit is reached,
+new pipes will be limited to a single page in size for this user in order to
+limit total memory usage, and trying to increase them using fcntl() will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+==============================================================
+
 protected_hardlinks:
 
 A long-standing class of security issues is the hardlink-based
index 053f613fc9a913af132da7f794742c45d6727dfc..07e4cdf024073033e2a52213bcbed4d562111a33 100644 (file)
@@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86
index b8a717c4f863ce4874816587069103a9c51517f3..7f1fa4ff300affdb1857eec9ac42278dc0a6c770 100644 (file)
@@ -223,9 +223,7 @@ F:  drivers/scsi/aacraid/
 
 ABI/API
 L:     linux-api@vger.kernel.org
-F:     Documentation/ABI/
 F:     include/linux/syscalls.h
-F:     include/uapi/
 F:     kernel/sys_ni.c
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
@@ -686,13 +684,6 @@ M: Michael Hanselmann <linux-kernel@hansmi.ch>
 S:     Supported
 F:     drivers/macintosh/ams/
 
-AMSO1100 RNIC DRIVER
-M:     Tom Tucker <tom@opengridcomputing.com>
-M:     Steve Wise <swise@opengridcomputing.com>
-L:     linux-rdma@vger.kernel.org
-S:     Maintained
-F:     drivers/infiniband/hw/amso1100/
-
 ANALOG DEVICES INC AD9389B DRIVER
 M:     Hans Verkuil <hans.verkuil@cisco.com>
 L:     linux-media@vger.kernel.org
@@ -967,6 +958,8 @@ M:  Rob Herring <robh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-highbank/
+F:     arch/arm/boot/dts/highbank.dts
+F:     arch/arm/boot/dts/ecx-*.dts*
 
 ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
 M:     Krzysztof Halasa <khalasa@piap.pl>
@@ -1042,6 +1035,7 @@ M:        Barry Song <baohua@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/prima2*
 F:     arch/arm/mach-prima2/
 F:     drivers/clk/sirf/
 F:     drivers/clocksource/timer-prima2.c
@@ -1143,6 +1137,10 @@ W:       http://www.hisilicon.com
 S:     Supported
 T:     git git://github.com/hisilicon/linux-hisi.git
 F:     arch/arm/mach-hisi/
+F:     arch/arm/boot/dts/hi3*
+F:     arch/arm/boot/dts/hip*
+F:     arch/arm/boot/dts/hisi*
+F:     arch/arm64/boot/dts/hisilicon/
 
 ARM/HP JORNADA 7XX MACHINE SUPPORT
 M:     Kristoffer Ericson <kristoffer.ericson@gmail.com>
@@ -1219,6 +1217,7 @@ M:        Santosh Shilimkar <ssantosh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-keystone/
+F:     arch/arm/boot/dts/k2*
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
 
 ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@ -1287,6 +1286,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-berlin/
 F:     arch/arm/boot/dts/berlin*
+F:     arch/arm64/boot/dts/marvell/berlin*
 
 
 ARM/Marvell Dove/MV78xx0/Orion SOC support
@@ -1425,6 +1425,7 @@ S:        Maintained
 F:     arch/arm/boot/dts/qcom-*.dts
 F:     arch/arm/boot/dts/qcom-*.dtsi
 F:     arch/arm/mach-qcom/
+F:     arch/arm64/boot/dts/qcom/*
 F:     drivers/soc/qcom/
 F:     drivers/tty/serial/msm_serial.h
 F:     drivers/tty/serial/msm_serial.c
@@ -1484,6 +1485,8 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/s3c*
+F:     arch/arm/boot/dts/s5p*
+F:     arch/arm/boot/dts/samsung*
 F:     arch/arm/boot/dts/exynos*
 F:     arch/arm64/boot/dts/exynos/
 F:     arch/arm/plat-samsung/
@@ -1563,6 +1566,7 @@ S:        Maintained
 F:     arch/arm/mach-socfpga/
 F:     arch/arm/boot/dts/socfpga*
 F:     arch/arm/configs/socfpga_defconfig
+F:     arch/arm64/boot/dts/altera/
 W:     http://www.rocketboards.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 
@@ -1716,7 +1720,7 @@ M:        Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/vexpress*
-F:     arch/arm64/boot/dts/arm/vexpress*
+F:     arch/arm64/boot/dts/arm/
 F:     arch/arm/mach-vexpress/
 F:     */*/vexpress*
 F:     */*/*/vexpress*
@@ -2343,6 +2347,7 @@ F:        arch/arm/mach-bcm/
 F:     arch/arm/boot/dts/bcm113*
 F:     arch/arm/boot/dts/bcm216*
 F:     arch/arm/boot/dts/bcm281*
+F:     arch/arm64/boot/dts/broadcom/
 F:     arch/arm/configs/bcm_defconfig
 F:     drivers/mmc/host/sdhci-bcm-kona.c
 F:     drivers/clocksource/bcm_kona_timer.c
@@ -2420,6 +2425,8 @@ F:        arch/mips/kernel/*bmips*
 F:     arch/mips/boot/dts/brcm/bcm*.dts*
 F:     drivers/irqchip/irq-bcm7*
 F:     drivers/irqchip/irq-brcmstb*
+F:     include/linux/bcm963xx_nvram.h
+F:     include/linux/bcm963xx_tag.h
 
 BROADCOM TG3 GIGABIT ETHERNET DRIVER
 M:     Prashant Sreedharan <prashant@broadcom.com>
@@ -3443,7 +3450,7 @@ S:        Maintained
 F:     drivers/usb/dwc2/
 
 DESIGNWARE USB3 DRD IP DRIVER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 L:     linux-omap@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -4182,13 +4189,6 @@ W:       http://aeschi.ch.eu.org/efs/
 S:     Orphan
 F:     fs/efs/
 
-EHCA (IBM GX bus InfiniBand adapter) DRIVER
-M:     Hoang-Nam Nguyen <hnguyen@de.ibm.com>
-M:     Christoph Raisch <raisch@de.ibm.com>
-L:     linux-rdma@vger.kernel.org
-S:     Supported
-F:     drivers/infiniband/hw/ehca/
-
 EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
 M:     Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
 L:     netdev@vger.kernel.org
@@ -5780,10 +5780,8 @@ INTEL TELEMETRY DRIVER
 M:     Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
-F:     drivers/platform/x86/intel_telemetry_core.c
 F:     arch/x86/include/asm/intel_telemetry.h
-F:     drivers/platform/x86/intel_telemetry_pltdrv.c
-F:     drivers/platform/x86/intel_telemetry_debugfs.c
+F:     drivers/platform/x86/intel_telemetry*
 
 IOC3 ETHERNET DRIVER
 M:     Ralf Baechle <ralf@linux-mips.org>
@@ -5809,12 +5807,6 @@ M:       Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
 S:     Maintained
 F:     net/ipv4/netfilter/ipt_MASQUERADE.c
 
-IPATH DRIVER
-M:     Mike Marciniszyn <infinipath@intel.com>
-L:     linux-rdma@vger.kernel.org
-S:     Maintained
-F:     drivers/staging/rdma/ipath/
-
 IPMI SUBSYSTEM
 M:     Corey Minyard <minyard@acm.org>
 L:     openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
@@ -6218,6 +6210,14 @@ F:       arch/arm64/include/uapi/asm/kvm*
 F:     arch/arm64/include/asm/kvm*
 F:     arch/arm64/kvm/
 
+KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
+M:     James Hogan <james.hogan@imgtec.com>
+L:     linux-mips@linux-mips.org
+S:     Supported
+F:     arch/mips/include/uapi/asm/kvm*
+F:     arch/mips/include/asm/kvm*
+F:     arch/mips/kvm/
+
 KEXEC
 M:     Eric Biederman <ebiederm@xmission.com>
 W:     http://kernel.org/pub/linux/utils/kernel/kexec/
@@ -6315,6 +6315,12 @@ S:       Maintained
 F:     net/l3mdev
 F:     include/net/l3mdev.h
 
+LANTIQ MIPS ARCHITECTURE
+M:     John Crispin <blogic@openwrt.org>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/lantiq
+
 LAPB module
 L:     linux-x25@vger.kernel.org
 S:     Orphan
@@ -7151,27 +7157,45 @@ W:      https://linuxtv.org
 S:     Odd Fixes
 F:     drivers/media/radio/radio-miropcm20*
 
-Mellanox MLX5 core VPI driver
-M:     Eli Cohen <eli@mellanox.com>
+MELLANOX MLX4 core VPI driver
+M:     Yishai Hadas <yishaih@mellanox.com>
 L:     netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
+S:     Supported
+F:     drivers/net/ethernet/mellanox/mlx4/
+F:     include/linux/mlx4/
+
+MELLANOX MLX4 IB driver
+M:     Yishai Hadas <yishaih@mellanox.com>
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://openfabrics.org/~eli/connect-ib.git
+S:     Supported
+F:     drivers/infiniband/hw/mlx4/
+F:     include/linux/mlx4/
+
+MELLANOX MLX5 core VPI driver
+M:     Matan Barak <matanb@mellanox.com>
+M:     Leon Romanovsky <leonro@mellanox.com>
+L:     netdev@vger.kernel.org
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
+Q:     http://patchwork.ozlabs.org/project/netdev/list/
 S:     Supported
 F:     drivers/net/ethernet/mellanox/mlx5/core/
 F:     include/linux/mlx5/
 
-Mellanox MLX5 IB driver
-M:     Eli Cohen <eli@mellanox.com>
+MELLANOX MLX5 IB driver
+M:     Matan Barak <matanb@mellanox.com>
+M:     Leon Romanovsky <leonro@mellanox.com>
 L:     linux-rdma@vger.kernel.org
 W:     http://www.mellanox.com
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://openfabrics.org/~eli/connect-ib.git
 S:     Supported
-F:     include/linux/mlx5/
 F:     drivers/infiniband/hw/mlx5/
+F:     include/linux/mlx5/
 
 MELEXIS MLX90614 DRIVER
 M:     Crt Mori <cmo@melexis.com>
@@ -7338,7 +7362,7 @@ F:        drivers/tty/isicom.c
 F:     include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
@@ -7702,6 +7726,12 @@ W:       https://github.com/jonmason/ntb/wiki
 T:     git git://github.com/jonmason/ntb.git
 F:     drivers/ntb/hw/intel/
 
+NTB AMD DRIVER
+M:     Xiangliang Yu <Xiangliang.Yu@amd.com>
+L:     linux-ntb@googlegroups.com
+S:     Supported
+F:     drivers/ntb/hw/amd/
+
 NTFS FILESYSTEM
 M:     Anton Altaparmakov <anton@tuxera.com>
 L:     linux-ntfs-dev@lists.sourceforge.net
@@ -7901,7 +7931,7 @@ F:        drivers/media/platform/omap3isp/
 F:     drivers/staging/media/omap4iss/
 
 OMAP USB SUPPORT
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 L:     linux-omap@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -8780,6 +8810,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/hzhuang1/linux.git
 T:     git git://github.com/rjarzmik/linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/pxa*
 F:     arch/arm/mach-pxa/
 F:     drivers/dma/pxa*
 F:     drivers/pcmcia/pxa2xx*
@@ -8809,6 +8840,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/hzhuang1/linux.git
 T:     git git://git.linaro.org/people/ycmiao/pxa-linux.git
 S:     Maintained
+F:     arch/arm/boot/dts/mmp*
 F:     arch/arm/mach-mmp/
 
 PXA MMCI DRIVER
@@ -8975,6 +9007,12 @@ L:       linux-fbdev@vger.kernel.org
 S:     Maintained
 F:     drivers/video/fbdev/aty/aty128fb.c
 
+RALINK MIPS ARCHITECTURE
+M:     John Crispin <blogic@openwrt.org>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/ralink
+
 RALINK RT2X00 WIRELESS LAN DRIVER
 P:     rt2x00 project
 M:     Stanislaw Gruszka <sgruszka@redhat.com>
@@ -10114,6 +10152,7 @@ S:      Supported
 F:     drivers/media/pci/solo6x10/
 
 SOFTWARE RAID (Multiple Disks) SUPPORT
+M:     Shaohua Li <shli@kernel.org>
 L:     linux-raid@vger.kernel.org
 T:     git git://neil.brown.name/md
 S:     Supported
@@ -10129,7 +10168,7 @@ F:      drivers/net/ethernet/natsemi/sonic.*
 
 SONICS SILICON BACKPLANE DRIVER (SSB)
 M:     Michael Buesch <m@bues.ch>
-L:     netdev@vger.kernel.org
+L:     linux-wireless@vger.kernel.org
 S:     Maintained
 F:     drivers/ssb/
 F:     include/linux/ssb/
@@ -10247,6 +10286,7 @@ L:      spear-devel@list.st.com
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
+F:     arch/arm/boot/dts/spear*
 F:     arch/arm/mach-spear/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
@@ -10453,9 +10493,11 @@ S:     Maintained
 F:     drivers/net/ethernet/dlink/sundance.c
 
 SUPERH
+M:     Yoshinori Sato <ysato@users.sourceforge.jp>
+M:     Rich Felker <dalias@libc.org>
 L:     linux-sh@vger.kernel.org
 Q:     http://patchwork.kernel.org/project/linux-sh/list/
-S:     Orphan
+S:     Maintained
 F:     Documentation/sh/
 F:     arch/sh/
 F:     drivers/sh/
@@ -11272,7 +11314,7 @@ F:      Documentation/usb/ehci.txt
 F:     drivers/usb/host/ehci*
 
 USB GADGET/PERIPHERAL SUBSYSTEM
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 W:     http://www.linux-usb.org/gadget
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -11348,7 +11390,7 @@ S:      Maintained
 F:     drivers/net/usb/pegasus.*
 
 USB PHY LAYER
-M:     Felipe Balbi <balbi@ti.com>
+M:     Felipe Balbi <balbi@kernel.org>
 L:     linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
@@ -12087,7 +12129,7 @@ F:      drivers/net/hamradio/*scc.c
 F:     drivers/net/hamradio/z8530.h
 
 ZBUD COMPRESSED PAGE ALLOCATOR
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zbud.c
@@ -12142,7 +12184,7 @@ F:      include/linux/zsmalloc.h
 F:     Documentation/vm/zsmalloc.txt
 
 ZSWAP COMPRESSED SWAP CACHING
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/zswap.c
index abfb3e8eb0b14e680290f3800bd331e870cbb8c6..682840850f580d6c5361d0e6db4084694e90a1c4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 4
+PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc3
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
index 5c0e5cc8ed1026373b0f34a97c95bbfc49e4f5c5..c6b6175d020329ac74eeefb0eebf1a6c353d6ea8 100644 (file)
@@ -153,10 +153,9 @@ choice
                  mobile SoCs in the Kona family of chips (e.g. bcm28155,
                  bcm11351, etc...)
 
-       config DEBUG_BCM63XX
+       config DEBUG_BCM63XX_UART
                bool "Kernel low-level debugging on BCM63XX UART"
                depends on ARCH_BCM_63XX
-               select DEBUG_UART_BCM63XX
 
        config DEBUG_BERLIN_UART
                bool "Marvell Berlin SoC Debug UART"
@@ -1414,7 +1413,7 @@ config DEBUG_LL_INCLUDE
        default "debug/vf.S" if DEBUG_VF_UART
        default "debug/vt8500.S" if DEBUG_VT8500_UART0
        default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
-       default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX
+       default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART
        default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0
        default "mach/debug-macro.S"
 
@@ -1428,10 +1427,6 @@ config DEBUG_UART_8250
                ARCH_IOP13XX || ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX || \
                ARCH_RPC
 
-# Compatibility options for BCM63xx
-config DEBUG_UART_BCM63XX
-       def_bool ARCH_BCM_63XX
-
 config DEBUG_UART_PHYS
        hex "Physical base address of debug UART"
        default 0x00100a00 if DEBUG_NETX_UART
@@ -1529,7 +1524,7 @@ config DEBUG_UART_PHYS
        default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
        default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
        default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
-       default 0xfffe8600 if DEBUG_UART_BCM63XX
+       default 0xfffe8600 if DEBUG_BCM63XX_UART
        default 0xfffff700 if ARCH_IOP33X
        depends on ARCH_EP93XX || \
                DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
@@ -1542,7 +1537,7 @@ config DEBUG_UART_PHYS
                DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \
                DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \
                DEBUG_S3C64XX_UART || \
-               DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+               DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
                DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 || \
                DEBUG_AT91_UART
 
@@ -1588,7 +1583,7 @@ config DEBUG_UART_VIRT
        default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT
        default 0xfc40ab00 if DEBUG_BRCMSTB_UART
        default 0xfc705000 if DEBUG_ZTE_ZX
-       default 0xfcfe8600 if DEBUG_UART_BCM63XX
+       default 0xfcfe8600 if DEBUG_BCM63XX_UART
        default 0xfd000000 if DEBUG_SPEAR3XX || DEBUG_SPEAR13XX
        default 0xfd012000 if DEBUG_MVEBU_UART0_ALTERNATE && ARCH_MV78XX0
        default 0xfd883000 if DEBUG_ALPINE_UART0
@@ -1638,7 +1633,7 @@ config DEBUG_UART_VIRT
                DEBUG_NETX_UART || \
                DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
                DEBUG_S3C64XX_UART || \
-               DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+               DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
                DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0
 
 config DEBUG_UART_8250_SHIFT
index 4c23a68a0917049e4c7e9e864dcebca38d6b75b2..7a6a58ef8aaf815728fd59b0ca7af752495595b0 100644 (file)
@@ -106,6 +106,15 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
index 04885f9f959e21fcffb388940d64be0b6eb32869..1fafaad516ba0481de34cc12320fb9e51c2884ce 100644 (file)
                        ti,mbox-num-users = <4>;
                        ti,mbox-num-fifos = <8>;
                        mbox_wkupm3: wkup_m3 {
+                               ti,mbox-send-noirq;
                                ti,mbox-tx = <0 0 0>;
                                ti,mbox-rx = <0 0 3>;
                        };
index df955ba4dc6203273ff795cc1492e698764eda0d..92068fbf8b577440409c8e029d7cfba28da43cf8 100644 (file)
@@ -73,7 +73,7 @@
        global_timer: timer@48240200 {
                compatible = "arm,cortex-a9-global-timer";
                reg = <0x48240200 0x100>;
-               interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                interrupt-parent = <&gic>;
                clocks = <&mpu_periphclk>;
        };
@@ -81,7 +81,7 @@
        local_timer: timer@48240600 {
                compatible = "arm,cortex-a9-twd-timer";
                reg = <0x48240600 0x100>;
-               interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
                interrupt-parent = <&gic>;
                clocks = <&mpu_periphclk>;
        };
                        ti,mbox-num-users = <4>;
                        ti,mbox-num-fifos = <8>;
                        mbox_wkupm3: wkup_m3 {
+                               ti,mbox-send-noirq;
                                ti,mbox-tx = <0 0 0>;
                                ti,mbox-rx = <0 0 3>;
                        };
index 64d43325bcbc73aa08bf1da025c255883737f233..ecd09ab6d581bf95aa0b8bee8d596c3da8b9ee22 100644 (file)
                pinctrl-names = "default";
                pinctrl-0 = <&pixcir_ts_pins>;
                reg = <0x5c>;
-               interrupt-parent = <&gpio3>;
-               interrupts = <22 0>;
 
                attb-gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
 
                 * 0x264 represents the offset of padconf register of
                 * gpio3_22 from am43xx_pinmux base.
                 */
-               interrupts-extended = <&gpio3 22 IRQ_TYPE_NONE>,
+               interrupts-extended = <&gpio3 22 IRQ_TYPE_EDGE_FALLING>,
                                      <&am43xx_pinmux 0x264>;
                interrupt-names = "tsc", "wakeup";
 
index 746fd2b179587fe4522724f5b42b16202e97edef..d580e2b70f9a65f7dc078799add6d56628ae0169 100644 (file)
                pinctrl-0 = <&pixcir_ts_pins>;
                reg = <0x5c>;
                interrupt-parent = <&gpio1>;
-               interrupts = <17 0>;
+               interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
 
                attb-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>;
 
index c53882643ae96b6da8b15c6a26e5fd93ba3300cb..8d93882dc8d541a77c870e635813c32a7c77b79d 100644 (file)
                        DRA7XX_CORE_IOPAD(0x35b8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d20.rgmii1_rd3 */
                        DRA7XX_CORE_IOPAD(0x35bc, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d21.rgmii1_rd2 */
                        DRA7XX_CORE_IOPAD(0x35c0, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d22.rgmii1_rd1 */
-                       DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLUP | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
+                       DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */
                >;
        };
 
        pinctrl-names = "default";
        pinctrl-0 = <&qspi1_pins>;
 
-       spi-max-frequency = <20000000>;
+       spi-max-frequency = <48000000>;
 
        spi_flash: spi_flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
                compatible = "spansion,m25p80", "jedec,spi-nor";
                reg = <0>;                              /* CS0 */
-               spi-max-frequency = <20000000>;
+               spi-max-frequency = <48000000>;
 
                partition@0 {
                        label = "uboot";
 
 &cpsw_emac0 {
        phy_id = <&davinci_mdio>, <0>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        dual_emac_res_vlan = <0>;
 };
 
 &cpsw_emac1 {
        phy_id = <&davinci_mdio>, <1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        dual_emac_res_vlan = <1>;
 };
 
 };
 
 &usb2 {
-       dr_mode = "peripheral";
+       dr_mode = "host";
 };
 
 &mcasp3 {
index 77bb8e17401a26cad24dcfd2ab5a7c9ddbd4d312..988e99632d49953d012b72e674f8fa040883a1c4 100644 (file)
@@ -25,8 +25,8 @@
 &dra7_pmx_core {
        uart3_pins_default: uart3_pins_default {
                pinctrl-single,pins = <
-                       DRA7XX_CORE_IOPAD(0x37f8, PIN_INPUT_SLEW | MUX_MODE2)   /* uart2_ctsn.uart3_rxd */
-                       DRA7XX_CORE_IOPAD(0x37fc, PIN_INPUT_SLEW | MUX_MODE1)   /* uart2_rtsn.uart3_txd */
+                       DRA7XX_CORE_IOPAD(0x3648, PIN_INPUT_SLEW | MUX_MODE0)   /* uart3_rxd */
+                       DRA7XX_CORE_IOPAD(0x364c, PIN_INPUT_SLEW | MUX_MODE0)   /* uart3_txd */
                >;
        };
 
        pinctrl-0 = <&i2c5_pins_default>;
        clock-frequency = <400000>;
 
-       eeprom_base: atmel@50 {
+       eeprom_base: atmel@54 {
                compatible = "atmel,24c08";
-               reg = <0x50>;
+               reg = <0x54>;
                pagesize = <16>;
        };
 
index 13cf69a8d0fb392b264910f014cf7ff8434f9e2b..fb9e1bbf23385b85b0b82ddb153b922b2d2e0178 100644 (file)
                                nand-on-flash-bbt;
 
                                partitions {
+                                       compatible = "fixed-partitions";
                                        #address-cells = <1>;
                                        #size-cells = <1>;
 
index 77ddff036409f7cb84d28342f8b973ce9792d09e..e683856c507c8bedacb5f8746a43cf9e4aef2207 100644 (file)
 
                        macb0: ethernet@f8008000 {
                                pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_macb0_default>;
+                               pinctrl-0 = <&pinctrl_macb0_default &pinctrl_macb0_phy_irq>;
                                phy-mode = "rmii";
                                status = "okay";
+
+                               ethernet-phy@1 {
+                                       reg = <0x1>;
+                                       interrupt-parent = <&pioA>;
+                                       interrupts = <73 IRQ_TYPE_LEVEL_LOW>;
+                               };
                        };
 
                        pdmic@f8018000 {
                                        bias-disable;
                                };
 
+                               pinctrl_macb0_phy_irq: macb0_phy_irq {
+                                       pinmux = <PIN_PC9__GPIO>;
+                               };
+
                                pinctrl_pdmic_default: pdmic_default {
                                        pinmux = <PIN_PB26__PDMIC_DAT>,
                                                <PIN_PB27__PDMIC_CLK>;
index 131614f28e758653e34cc2b993bb6a5a28f20bbb..569026e8f96cadaf25eeb10ca207c02c7f175121 100644 (file)
                        macb0: ethernet@f8020000 {
                                phy-mode = "rmii";
                                status = "okay";
+                               pinctrl-names = "default";
+                               pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
 
                                phy0: ethernet-phy@1 {
                                        interrupt-parent = <&pioE>;
-                                       interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+                                       interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
                                        reg = <1>;
                                };
                        };
                                                atmel,pins =
                                                        <AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
                                        };
+                                       pinctrl_macb0_phy_irq: macb0_phy_irq_0 {
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+                                       };
                                };
                        };
                };
index 2d4a33100af6bdc4fcd4a3500467456673f54325..4e98cda974032221dbf5a0917de97807653cf943 100644 (file)
                        };
 
                        macb0: ethernet@f8020000 {
+                               pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
                                phy-mode = "rmii";
                                status = "okay";
+
+                               ethernet-phy@1 {
+                                       reg = <0x1>;
+                                       interrupt-parent = <&pioE>;
+                                       interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+                               };
                        };
 
                        mmc1: mmc@fc000000 {
 
                        pinctrl@fc06a000 {
                                board {
+                                       pinctrl_macb0_phy_irq: macb0_phy_irq {
+                                               atmel,pins =
+                                                       <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+                                       };
                                        pinctrl_mmc0_cd: mmc0_cd {
                                                atmel,pins =
                                                        <AT91_PIOE 5 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
index ca4ddf86817ab64dc6a8a31193191460bfaf8d4b..626c67d666269d5e878c650b76e8babc424bd48a 100644 (file)
        };
 
        panel: panel {
-               compatible = "qd,qd43003c0-40", "simple-panel";
+               compatible = "qiaodian,qd43003c0-40", "simple-panel";
                backlight = <&backlight>;
                power-supply = <&panel_reg>;
                #address-cells = <1>;
index 09eed3cea0afcd0ab74df3a13de8525714f388cc..36eec7392ab491c839f86ca3299cc716af21e356 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WVL/VL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
                button@1 {
                        label = "Function Button";
                        linux,code = <KEY_OPTION>;
-                       gpios = <&gpio0 45 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
                };
 
                button@2 {
                        label = "Power-on Switch";
                        linux,code = <KEY_RESERVED>;
                        linux,input-type = <5>;
-                       gpios = <&gpio0 46 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
                };
 
                button@3 {
                        label = "Power-auto Switch";
                        linux,code = <KEY_ESC>;
                        linux,input-type = <5>;
-                       gpios = <&gpio0 47 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
                };
        };
 
 
                led@1 {
                        label = "lswvl:red:alarm";
-                       gpios = <&gpio0 36 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
                };
 
                led@2 {
                        label = "lswvl:red:func";
-                       gpios = <&gpio0 37 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>;
                };
 
                led@3 {
                        label = "lswvl:amber:info";
-                       gpios = <&gpio0 38 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
                };
 
                led@4 {
                        label = "lswvl:blue:func";
-                       gpios = <&gpio0 39 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
                };
 
                led@5 {
                        label = "lswvl:blue:power";
-                       gpios = <&gpio0 40 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
                        default-state = "keep";
                };
 
                led@6 {
                        label = "lswvl:red:hdderr0";
-                       gpios = <&gpio0 34 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
                };
 
                led@7 {
                        label = "lswvl:red:hdderr1";
-                       gpios = <&gpio0 35 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
                };
        };
 
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio0 43 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio1 11 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
index f5db16a08597e9a732f575c13b00415ce8e40920..b13ec20a708873bb65619ccd203c3552b04bc246 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WXL/WSXL
  *
- * Copyright (C) 2015, rogershimizu@gmail.com
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
                button@1 {
                        label = "Function Button";
                        linux,code = <KEY_OPTION>;
-                       gpios = <&gpio1 41 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
                };
 
                button@2 {
                        label = "Power-on Switch";
                        linux,code = <KEY_RESERVED>;
                        linux,input-type = <5>;
-                       gpios = <&gpio1 42 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
                };
 
                button@3 {
                        label = "Power-auto Switch";
                        linux,code = <KEY_ESC>;
                        linux,input-type = <5>;
-                       gpios = <&gpio1 43 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 11 GPIO_ACTIVE_LOW>;
                };
        };
 
 
                led@1 {
                        label = "lswxl:blue:func";
-                       gpios = <&gpio1 36 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
                };
 
                led@2 {
                        label = "lswxl:red:alarm";
-                       gpios = <&gpio1 49 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
                };
 
                led@3 {
 
                led@4 {
                        label = "lswxl:blue:power";
-                       gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
+                       default-state = "keep";
                };
 
                led@5 {
                        label = "lswxl:red:func";
-                       gpios = <&gpio1 5 GPIO_ACTIVE_LOW>;
-                       default-state = "keep";
+                       gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
                };
 
                led@6 {
                        label = "lswxl:red:hdderr0";
-                       gpios = <&gpio1 2 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>;
                };
 
                led@7 {
                        label = "lswxl:red:hdderr1";
-                       gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
+                       gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>;
                };
        };
 
                pinctrl-0 = <&pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
                pinctrl-names = "default";
 
-               gpios = <&gpio0 47 GPIO_ACTIVE_LOW
-                        &gpio0 48 GPIO_ACTIVE_LOW>;
+               gpios = <&gpio1 16 GPIO_ACTIVE_LOW
+                        &gpio1 15 GPIO_ACTIVE_LOW>;
 
                gpio-fan,speed-map = <0 3
                                1500 2
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio1 49 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
                        enable-active-high;
                        regulator-always-on;
                        regulator-boot-on;
-                       gpio = <&gpio0 37 GPIO_ACTIVE_HIGH>;
+                       gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>;
                };
                hdd_power0: regulator@2 {
                        compatible = "regulator-fixed";
index 1db6f2c506cce3209e64418493039046c79fd0ec..8082d64266a37c33c8fa2e7e722d3683519c3d0a 100644 (file)
        chip-delay = <40>;
        status = "okay";
        partitions {
+               compatible = "fixed-partitions";
                #address-cells = <1>;
                #size-cells = <1>;
 
index 7fed0bd4f3deea46281881ead81b2ebd29c5fa4b..00805322367e7eb73bcc4ba5bda124eee5372467 100644 (file)
        clock-frequency = <400000>;
 };
 
-&i2c2 {
-       clock-frequency = <400000>;
-};
-
-&i2c3 {
-       clock-frequency = <400000>;
-};
-
 /*
  * Only found on the wireless SOM. For the SOM without wireless, the pins for
  * MMC3 can be routed with jumpers to the second MMC slot on the devkit and
                interrupt-parent = <&gpio5>;
                interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
                ref-clock-frequency = <26000000>;
+               tcxo-clock-frequency = <26000000>;
        };
 };
 
index 888412c63f97492ef445368c3208434d748912f2..902657d6713b073df82d33158d3d1cd0fac5ef16 100644 (file)
        };
 };
 
+&gpio8 {
+       /* TI trees use GPIO instead of msecure, see also muxing */
+       p234 {
+               gpio-hog;
+               gpios = <10 GPIO_ACTIVE_HIGH>;
+               output-high;
+               line-name = "gpio8_234/msecure";
+       };
+};
+
 &omap5_pmx_core {
        pinctrl-names = "default";
        pinctrl-0 = <
                >;
        };
 
+       /* TI trees use GPIO mode; msecure mode does not work reliably? */
+       palmas_msecure_pins: palmas_msecure_pins {
+               pinctrl-single,pins = <
+                       OMAP5_IOPAD(0x180, PIN_OUTPUT | MUX_MODE6) /* gpio8_234 */
+               >;
+       };
+
        usbhost_pins: pinmux_usbhost_pins {
                pinctrl-single,pins = <
                        OMAP5_IOPAD(0x0c4, PIN_INPUT | MUX_MODE0) /* usbb2_hsic_strobe */
                        &usbhost_wkup_pins
        >;
 
+       palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins {
+               pinctrl-single,pins = <
+                       OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */
+               >;
+       };
+
        usbhost_wkup_pins: pinmux_usbhost_wkup_pins {
                pinctrl-single,pins = <
                        OMAP5_IOPAD(0x05a, PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */
                interrupt-controller;
                #interrupt-cells = <2>;
                ti,system-power-controller;
+               pinctrl-names = "default";
+               pinctrl-0 = <&palmas_sys_nirq_pins &palmas_msecure_pins>;
 
                extcon_usb3: palmas_usb {
                        compatible = "ti,palmas-usb-vid";
                        #clock-cells = <0>;
                };
 
+               rtc {
+                       compatible = "ti,palmas-rtc";
+                       interrupt-parent = <&palmas>;
+                       interrupts = <8 IRQ_TYPE_NONE>;
+                       ti,backup-battery-chargeable;
+                       ti,backup-battery-charge-high-current;
+               };
+
                palmas_pmic {
                        compatible = "ti,palmas-pmic";
                        interrupt-parent = <&palmas>;
index 3daec912b4bf118edba55ef54fdf633413e696e5..420788229e6f8a30b6c3a114018c209ebad151c4 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * Device Tree file for Buffalo Linkstation LS-WTGL
  *
- * Copyright (C) 2015, Roger Shimizu <rogershimizu@gmail.com>
+ * Copyright (C) 2015, 2016
+ * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -69,8 +70,6 @@
 
                internal-regs {
                        pinctrl: pinctrl@10000 {
-                               pinctrl-0 = <&pmx_usb_power &pmx_power_hdd
-                                       &pmx_fan_low &pmx_fan_high &pmx_fan_lock>;
                                pinctrl-names = "default";
 
                                pmx_led_power: pmx-leds {
                led@1 {
                        label = "lswtgl:blue:power";
                        gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+                       default-state = "keep";
                };
 
                led@2 {
                                3250 1
                                5000 0>;
 
-               alarm-gpios = <&gpio0 2 GPIO_ACTIVE_HIGH>;
+               alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
        };
 
        restart_poweroff {
index 78a21f2828dff6d1cbafa9139b12791d4840936d..c548cabb102f5ae1eb063a43117adedecf52b0b0 100644 (file)
 };
 
 &extal1_clk {
-       clock-frequency = <25000000>;
+       clock-frequency = <24000000>;
 };
 &extal2_clk {
        clock-frequency = <48000000>;
index b8032bca462152e6904d299f84514c483dde0165..db1151c18466c3ac530be0ba39ba2f00be3a06bc 100644 (file)
                        dbgu: serial@fc069000 {
                                compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
                                reg = <0xfc069000 0x200>;
-                               interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>;
+                               interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>;
                                pinctrl-names = "default";
                                pinctrl-0 = <&pinctrl_dbgu>;
                                clocks = <&dbgu_clk>;
index d0c74385331803383d5296d7157516aa63c63758..27a333eb89870167b82a170d3553bd5078fafdfb 100644 (file)
                        };
                        mmcsd_default_mode: mmcsd_default {
                                mmcsd_default_cfg1 {
-                                       /* MCCLK */
-                                       pins = "GPIO8_B10";
-                                       ste,output = <0>;
-                               };
-                               mmcsd_default_cfg2 {
-                                       /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */
-                                       pins = "GPIO10_C11", "GPIO15_A12",
-                                       "GPIO16_C13", "GPIO23_D15";
-                                       ste,output = <1>;
-                               };
-                               mmcsd_default_cfg3 {
-                                       /* MCCMD, MCDAT3-0, MCMSFBCLK */
-                                       pins = "GPIO9_A10", "GPIO11_B11",
-                                       "GPIO12_A11", "GPIO13_C12",
-                                       "GPIO14_B12", "GPIO24_C15";
-                                       ste,input = <1>;
+                                       /*
+                                        * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2
+                                        * MCCMD, MCDAT3-0, MCMSFBCLK
+                                        */
+                                       pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11",
+                                              "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12",
+                                              "GPIO16_C13", "GPIO23_D15", "GPIO24_C15";
+                                       ste,output = <2>;
                                };
                        };
                };
                        clock-names = "mclk", "apb_pclk";
                        interrupt-parent = <&vica>;
                        interrupts = <22>;
-                       max-frequency = <48000000>;
+                       max-frequency = <400000>;
                        bus-width = <4>;
                        cap-mmc-highspeed;
                        cap-sd-highspeed;
+                       full-pwr-cycle;
+                       /*
+                        * The STw4811 circuit used with the Nomadik strictly
+                        * requires that all of these signal direction pins be
+                        * routed and used for its 4-bit levelshifter.
+                        */
+                       st,sig-dir-dat0;
+                       st,sig-dir-dat2;
+                       st,sig-dir-dat31;
+                       st,sig-dir-cmd;
+                       st,sig-pin-fbclk;
                        pinctrl-names = "default";
                        pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>;
                        vmmc-supply = <&vmmc_regulator>;
index 314f6be2dca2b4aec6aaa82cc8cfb2ba06c2bb29..8e8b2ace9b7c5fb624f4e24122cf269538195f3d 100644 (file)
@@ -426,6 +426,7 @@ CONFIG_SUNXI_WATCHDOG=y
 CONFIG_IMX2_WDT=y
 CONFIG_TEGRA_WATCHDOG=m
 CONFIG_MESON_WATCHDOG=y
+CONFIG_DW_WATCHDOG=y
 CONFIG_DIGICOLOR_WATCHDOG=y
 CONFIG_MFD_AS3711=y
 CONFIG_MFD_AS3722=y
index c5e1943e5427db037b98ce779566aeab1b1c9310..a7151744b85c17d8c7041f33141e7c4cf7809fec 100644 (file)
@@ -50,6 +50,7 @@ CONFIG_SOC_AM33XX=y
 CONFIG_SOC_AM43XX=y
 CONFIG_SOC_DRA7XX=y
 CONFIG_ARM_THUMBEE=y
+CONFIG_ARM_KERNMEM_PERMS=y
 CONFIG_ARM_ERRATA_411920=y
 CONFIG_ARM_ERRATA_430973=y
 CONFIG_SMP=y
@@ -177,6 +178,7 @@ CONFIG_TI_CPTS=y
 CONFIG_AT803X_PHY=y
 CONFIG_SMSC_PHY=y
 CONFIG_USB_USBNET=m
+CONFIG_USB_NET_SMSC75XX=m
 CONFIG_USB_NET_SMSC95XX=m
 CONFIG_USB_ALI_M5632=y
 CONFIG_USB_AN2720=y
@@ -354,6 +356,11 @@ CONFIG_USB_MUSB_DSPS=m
 CONFIG_USB_INVENTRA_DMA=y
 CONFIG_USB_TI_CPPI41_DMA=y
 CONFIG_USB_DWC3=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_TEST=m
 CONFIG_AM335X_PHY_USB=y
 CONFIG_USB_GADGET=m
@@ -387,6 +394,7 @@ CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=m
 CONFIG_LEDS_GPIO=m
 CONFIG_LEDS_PWM=m
+CONFIG_LEDS_PCA963X=m
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_ONESHOT=m
@@ -449,6 +457,8 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_SPLIT=y
+CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
index ede692ffa32ed14958f81eec0d7caac8f7efe853..5dd2528e9e45e369d0879185291e6981dbea15e4 100644 (file)
 #define __NR_userfaultfd               (__NR_SYSCALL_BASE+388)
 #define __NR_membarrier                        (__NR_SYSCALL_BASE+389)
 #define __NR_mlock2                    (__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range           (__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
index ac368bb068d1409af37a8e2585bb837b1ef1f02b..dfc7cd6851ad4bbd09b11ae94ee6056f691b1fcb 100644 (file)
                CALL(sys_userfaultfd)
                CALL(sys_membarrier)
                CALL(sys_mlock2)
+               CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
index 9cda974a3009f3295e7bd013709f1bf586752282..d7f1d69daf6d4d083005e15d11d556c412aacfbc 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/pinctrl/machine.h>
-#include <linux/platform_data/mailbox-omap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
@@ -66,32 +65,6 @@ static int __init omap3_l3_init(void)
 }
 omap_postcore_initcall(omap3_l3_init);
 
-#if defined(CONFIG_OMAP2PLUS_MBOX) || defined(CONFIG_OMAP2PLUS_MBOX_MODULE)
-static inline void __init omap_init_mbox(void)
-{
-       struct omap_hwmod *oh;
-       struct platform_device *pdev;
-       struct omap_mbox_pdata *pdata;
-
-       oh = omap_hwmod_lookup("mailbox");
-       if (!oh) {
-               pr_err("%s: unable to find hwmod\n", __func__);
-               return;
-       }
-       if (!oh->dev_attr) {
-               pr_err("%s: hwmod doesn't have valid attrs\n", __func__);
-               return;
-       }
-
-       pdata = (struct omap_mbox_pdata *)oh->dev_attr;
-       pdev = omap_device_build("omap-mailbox", -1, oh, pdata, sizeof(*pdata));
-       WARN(IS_ERR(pdev), "%s: could not build device, err %ld\n",
-                                               __func__, PTR_ERR(pdev));
-}
-#else
-static inline void omap_init_mbox(void) { }
-#endif /* CONFIG_OMAP2PLUS_MBOX */
-
 static inline void omap_init_sti(void) {}
 
 #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE)
@@ -229,7 +202,6 @@ static int __init omap2_init_devices(void)
                 * please keep these calls, and their implementations above,
                 * in alphabetical order so they're easier to sort through.
                 */
-               omap_init_mbox();
                omap_init_mcspi();
                omap_init_sham();
                omap_init_aes();
index e781e4fae13a92fd68aa04ac891b9731bc8fa60e..a935d28443dab40e1a8005b7144aeb8de436c73c 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/platform_data/pinctrl-single.h>
 #include <linux/platform_data/iommu-omap.h>
 #include <linux/platform_data/wkup_m3.h>
+#include <linux/platform_data/pwm_omap_dmtimer.h>
+#include <plat/dmtimer.h>
 
 #include "common.h"
 #include "common-board-devices.h"
@@ -449,6 +451,24 @@ void omap_auxdata_legacy_init(struct device *dev)
        dev->platform_data = &twl_gpio_auxdata;
 }
 
+/* Dual mode timer PWM callbacks platdata */
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+struct pwm_omap_dmtimer_pdata pwm_dmtimer_pdata = {
+       .request_by_node = omap_dm_timer_request_by_node,
+       .free = omap_dm_timer_free,
+       .enable = omap_dm_timer_enable,
+       .disable = omap_dm_timer_disable,
+       .get_fclk = omap_dm_timer_get_fclk,
+       .start = omap_dm_timer_start,
+       .stop = omap_dm_timer_stop,
+       .set_load = omap_dm_timer_set_load,
+       .set_match = omap_dm_timer_set_match,
+       .set_pwm = omap_dm_timer_set_pwm,
+       .set_prescaler = omap_dm_timer_set_prescaler,
+       .write_counter = omap_dm_timer_write_counter,
+};
+#endif
+
 /*
  * Few boards still need auxdata populated before we populate
  * the dev entries in of_platform_populate().
@@ -502,6 +522,9 @@ static struct of_dev_auxdata omap_auxdata_lookup[] __initdata = {
        OF_DEV_AUXDATA("ti,am4372-wkup-m3", 0x44d00000, "44d00000.wkup_m3",
                       &wkup_m3_data),
 #endif
+#if IS_ENABLED(CONFIG_OMAP_DM_TIMER)
+       OF_DEV_AUXDATA("ti,omap-dmtimer-pwm", 0, NULL, &pwm_dmtimer_pdata),
+#endif
 #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5)
        OF_DEV_AUXDATA("ti,omap4-iommu", 0x4a066000, "4a066000.mmu",
                       &omap4_iommu_pdata),
index eafd120b53f1bc15c82f2cc47dc8033e31ca566e..1b9f0520dea9154afa31f9668241e03f211fdc6a 100644 (file)
@@ -86,13 +86,18 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
        stmfd   sp!, {lr}       @ save registers on stack
        /* Setup so that we will disable and enable l2 */
        mov     r1, #0x1
-       adrl    r2, l2dis_3630  @ may be too distant for plain adr
-       str     r1, [r2]
+       adrl    r3, l2dis_3630_offset   @ may be too distant for plain adr
+       ldr     r2, [r3]                @ value for offset
+       str     r1, [r2, r3]            @ write to l2dis_3630
        ldmfd   sp!, {pc}       @ restore regs and return
 ENDPROC(enable_omap3630_toggle_l2_on_restore)
 
-       .text
-/* Function to call rom code to save secure ram context */
+/*
+ * Function to call rom code to save secure ram context. This gets
+ * relocated to SRAM, so it can be all in .data section. Otherwise
+ * we need to initialize api_params separately.
+ */
+       .data
        .align  3
 ENTRY(save_secure_ram_context)
        stmfd   sp!, {r4 - r11, lr}     @ save registers on stack
@@ -126,6 +131,8 @@ ENDPROC(save_secure_ram_context)
 ENTRY(save_secure_ram_context_sz)
        .word   . - save_secure_ram_context
 
+       .text
+
 /*
  * ======================
  * == Idle entry point ==
@@ -289,12 +296,6 @@ wait_sdrc_ready:
        bic     r5, r5, #0x40
        str     r5, [r4]
 
-/*
- * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
- * base instead.
- * Be careful not to clobber r7 when maintaing this code.
- */
-
 is_dll_in_lock_mode:
        /* Is dll in lock mode? */
        ldr     r4, sdrc_dlla_ctrl
@@ -302,11 +303,7 @@ is_dll_in_lock_mode:
        tst     r5, #0x4
        bne     exit_nonoff_modes       @ Return if locked
        /* wait till dll locks */
-       adr     r7, kick_counter
 wait_dll_lock_timed:
-       ldr     r4, wait_dll_lock_counter
-       add     r4, r4, #1
-       str     r4, [r7, #wait_dll_lock_counter - kick_counter]
        ldr     r4, sdrc_dlla_status
        /* Wait 20uS for lock */
        mov     r6, #8
@@ -330,9 +327,6 @@ kick_dll:
        orr     r6, r6, #(1<<3)         @ enable dll
        str     r6, [r4]
        dsb
-       ldr     r4, kick_counter
-       add     r4, r4, #1
-       str     r4, [r7]                @ kick_counter
        b       wait_dll_lock_timed
 
 exit_nonoff_modes:
@@ -360,15 +354,6 @@ sdrc_dlla_status:
        .word   SDRC_DLLA_STATUS_V
 sdrc_dlla_ctrl:
        .word   SDRC_DLLA_CTRL_V
-       /*
-        * When exporting to userspace while the counters are in SRAM,
-        * these 2 words need to be at the end to facilitate retrival!
-        */
-kick_counter:
-       .word   0
-wait_dll_lock_counter:
-       .word   0
-
 ENTRY(omap3_do_wfi_sz)
        .word   . - omap3_do_wfi
 
@@ -437,7 +422,9 @@ ENTRY(omap3_restore)
        cmp     r2, #0x0        @ Check if target power state was OFF or RET
        bne     logic_l1_restore
 
-       ldr     r0, l2dis_3630
+       adr     r1, l2dis_3630_offset   @ address for offset
+       ldr     r0, [r1]                @ value for offset
+       ldr     r0, [r1, r0]            @ value at l2dis_3630
        cmp     r0, #0x1        @ should we disable L2 on 3630?
        bne     skipl2dis
        mrc     p15, 0, r0, c1, c0, 1
@@ -449,12 +436,14 @@ skipl2dis:
        and     r1, #0x700
        cmp     r1, #0x300
        beq     l2_inv_gp
+       adr     r0, l2_inv_api_params_offset
+       ldr     r3, [r0]
+       add     r3, r3, r0              @ r3 points to dummy parameters
        mov     r0, #40                 @ set service ID for PPA
        mov     r12, r0                 @ copy secure Service ID in r12
        mov     r1, #0                  @ set task id for ROM code in r1
        mov     r2, #4                  @ set some flags in r2, r6
        mov     r6, #0xff
-       adr     r3, l2_inv_api_params   @ r3 points to dummy parameters
        dsb                             @ data write barrier
        dmb                             @ data memory barrier
        smc     #1                      @ call SMI monitor (smi #1)
@@ -488,8 +477,8 @@ skipl2dis:
        b       logic_l1_restore
 
        .align
-l2_inv_api_params:
-       .word   0x1, 0x00
+l2_inv_api_params_offset:
+       .long   l2_inv_api_params - .
 l2_inv_gp:
        /* Execute smi to invalidate L2 cache */
        mov r12, #0x1                   @ set up to invalidate L2
@@ -506,7 +495,9 @@ l2_inv_gp:
        mov     r12, #0x2
        smc     #0                      @ Call SMI monitor (smieq)
 logic_l1_restore:
-       ldr     r1, l2dis_3630
+       adr     r0, l2dis_3630_offset   @ adress for offset
+       ldr     r1, [r0]                @ value for offset
+       ldr     r1, [r0, r1]            @ value at l2dis_3630
        cmp     r1, #0x1                @ Test if L2 re-enable needed on 3630
        bne     skipl2reen
        mrc     p15, 0, r1, c1, c0, 1
@@ -535,9 +526,17 @@ control_stat:
        .word   CONTROL_STAT
 control_mem_rta:
        .word   CONTROL_MEM_RTA_CTRL
+l2dis_3630_offset:
+       .long   l2dis_3630 - .
+
+       .data
 l2dis_3630:
        .word   0
 
+       .data
+l2_inv_api_params:
+       .word   0x1, 0x00
+
 /*
  * Internal functions
  */
index 9b09d85d811a1c52b0fa735a2e2743e05346843d..c7a3b4aab4b5441249ddd9a7fdc362ef9370d737 100644 (file)
        dsb
 .endm
 
-ppa_zero_params:
-       .word           0x0
-
-ppa_por_params:
-       .word           1, 0
-
 #ifdef CONFIG_ARCH_OMAP4
 
 /*
@@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume)
        beq     skip_ns_smp_enable
 ppa_actrl_retry:
        mov     r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
-       adr     r3, ppa_zero_params             @ Pointer to parameters
+       adr     r1, ppa_zero_params_offset
+       ldr     r3, [r1]
+       add     r3, r3, r1                      @ Pointer to ppa_zero_params
        mov     r1, #0x0                        @ Process ID
        mov     r2, #0x4                        @ Flag
        mov     r6, #0xff
@@ -303,7 +299,9 @@ skip_ns_smp_enable:
        ldr     r0, =OMAP4_PPA_L2_POR_INDEX
        ldr     r1, =OMAP44XX_SAR_RAM_BASE
        ldr     r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
-       adr     r3, ppa_por_params
+       adr     r1, ppa_por_params_offset
+       ldr     r3, [r1]
+       add     r3, r3, r1                      @ Pointer to ppa_por_params
        str     r4, [r3, #0x04]
        mov     r1, #0x0                        @ Process ID
        mov     r2, #0x4                        @ Flag
@@ -328,6 +326,8 @@ skip_l2en:
 #endif
 
        b       cpu_resume                      @ Jump to generic resume
+ppa_por_params_offset:
+       .long   ppa_por_params - .
 ENDPROC(omap4_cpu_resume)
 #endif /* CONFIG_ARCH_OMAP4 */
 
@@ -380,4 +380,13 @@ ENTRY(omap_do_wfi)
        nop
 
        ldmfd   sp!, {pc}
+ppa_zero_params_offset:
+       .long   ppa_zero_params - .
 ENDPROC(omap_do_wfi)
+
+       .data
+ppa_zero_params:
+       .word           0
+
+ppa_por_params:
+       .word           1, 0
index def40a0dd60cd290d91233cc47a1a8bad4ea6aa1..70ab4a25a5f853d4d8dade14490618d2523fcd13 100644 (file)
@@ -1,5 +1,6 @@
 menuconfig ARCH_REALVIEW
-       bool "ARM Ltd. RealView family" if ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
+       bool "ARM Ltd. RealView family"
+       depends on ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7
        select ARM_AMBA
        select ARM_TIMER_SP804
        select COMMON_CLK_VERSATILE
index 8be6632407d8f0dbb75f87c5878578dffba70fbb..dae8d86ef4ccc75c4bb3dfbb98aba9e7e12eec9f 100644 (file)
@@ -4,10 +4,9 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
        -I$(srctree)/arch/arm/plat-versatile/include
 
-
+obj-y                                  := core.o
 obj-$(CONFIG_REALVIEW_DT)              += realview-dt.o
 obj-$(CONFIG_SMP)                      += platsmp-dt.o
-obj-y                                  := core.o
 
 ifdef CONFIG_ATAGS
 obj-$(CONFIG_MACH_REALVIEW_EB)         += realview_eb.o
index 65585392655b3ac499dff837b2f6adfe02d5a708..6964e88760614ba1f8fa60c7f6fd12f3b07dfe20 100644 (file)
@@ -80,7 +80,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus)
                     virt_to_phys(versatile_secondary_startup));
 }
 
-struct smp_operations realview_dt_smp_ops __initdata = {
+static const struct smp_operations realview_dt_smp_ops __initconst = {
        .smp_prepare_cpus       = realview_smp_prepare_cpus,
        .smp_secondary_init     = versatile_secondary_init,
        .smp_boot_secondary     = versatile_boot_secondary,
index d6a3714b096e69801c381d320a0ccb1be3472585..ebe15b93bbe870804eb17759b05096a8f7298167 100644 (file)
@@ -1,5 +1,6 @@
 config ARCH_TANGO
-       bool "Sigma Designs Tango4 (SMP87xx)" if ARCH_MULTI_V7
+       bool "Sigma Designs Tango4 (SMP87xx)"
+       depends on ARCH_MULTI_V7
        # Cortex-A9 MPCore r3p0, PL310 r3p2
        select ARCH_HAS_HOLES_MEMORYMODEL
        select ARM_ERRATA_754322
index a18d5a34e2f5738e4f62bd24bf8a0b9edff7a3e8..a21f55e000d258c734535cbfc9df57744599887a 100644 (file)
@@ -9,7 +9,7 @@ static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle)
        return 0;
 }
 
-static struct smp_operations tango_smp_ops __initdata = {
+static const struct smp_operations tango_smp_ops __initconst = {
        .smp_boot_secondary     = tango_boot_secondary,
 };
 
index a90f3556017fe80ae3d2587e54e063cd619c0e6a..0fa8b84ed657e702a0f34f037927bceec78b8046 100644 (file)
@@ -13,57 +13,5 @@ menuconfig ARCH_TEGRA
        select ARCH_HAS_RESET_CONTROLLER
        select RESET_CONTROLLER
        select SOC_BUS
-       select USB_ULPI if USB_PHY
-       select USB_ULPI_VIEWPORT if USB_PHY
        help
          This enables support for NVIDIA Tegra based systems.
-
-if ARCH_TEGRA
-
-config ARCH_TEGRA_2x_SOC
-       bool "Enable support for Tegra20 family"
-       select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
-       select ARM_ERRATA_720789
-       select ARM_ERRATA_754327 if SMP
-       select ARM_ERRATA_764369 if SMP
-       select PINCTRL_TEGRA20
-       select PL310_ERRATA_727915 if CACHE_L2X0
-       select PL310_ERRATA_769419 if CACHE_L2X0
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra AP20 and T20 processors, based on the
-         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_3x_SOC
-       bool "Enable support for Tegra30 family"
-       select ARM_ERRATA_754322
-       select ARM_ERRATA_764369 if SMP
-       select PINCTRL_TEGRA30
-       select PL310_ERRATA_769419 if CACHE_L2X0
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T30 processor family, based on the
-         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_114_SOC
-       bool "Enable support for Tegra114 family"
-       select ARM_ERRATA_798181 if SMP
-       select ARM_L1_CACHE_SHIFT_6
-       select HAVE_ARM_ARCH_TIMER
-       select PINCTRL_TEGRA114
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T114 processor family, based on the
-         ARM CortexA15MP CPU
-
-config ARCH_TEGRA_124_SOC
-       bool "Enable support for Tegra124 family"
-       select ARM_L1_CACHE_SHIFT_6
-       select HAVE_ARM_ARCH_TIMER
-       select PINCTRL_TEGRA124
-       select TEGRA_TIMER
-       help
-         Support for NVIDIA Tegra T124 processor family, based on the
-         ARM CortexA15MP CPU
-
-endif
index e6b684e14322cef21e44d1b660e4c6e8d0c7ec43..f5d19667484edd38e65bb1aa1d1a4413a036bfbf 100644 (file)
@@ -231,8 +231,11 @@ ENDPROC(tegra20_cpu_is_resettable_soon)
  * tegra20_tear_down_core in IRAM
  */
 ENTRY(tegra20_sleep_core_finish)
+       mov     r4, r0
        /* Flush, disable the L1 data cache and exit SMP */
+       mov     r0, #TEGRA_FLUSH_CACHE_ALL
        bl      tegra_disable_clean_inv_dcache
+       mov     r0, r4
 
        mov32   r3, tegra_shut_off_mmu
        add     r3, r3, r0
index 9a2f0b051e1035a4f956dbf98a90a9de493416e9..16e5ff03383cad5b709eb42b24340b033f86a2c7 100644 (file)
@@ -242,8 +242,11 @@ ENDPROC(tegra30_cpu_shutdown)
  * tegra30_tear_down_core in IRAM
  */
 ENTRY(tegra30_sleep_core_finish)
+       mov     r4, r0
        /* Flush, disable the L1 data cache and exit SMP */
+       mov     r0, #TEGRA_FLUSH_CACHE_ALL
        bl      tegra_disable_clean_inv_dcache
+       mov     r0, r4
 
        /*
         * Preload all the address literals that are needed for the
index 534a60ae282e702d3b06e8dad289d8f109cd80ef..0eca3812527e614e891113839992b4e20cf3fb8d 100644 (file)
@@ -1200,10 +1200,7 @@ error:
        while (i--)
                if (pages[i])
                        __free_pages(pages[i], 0);
-       if (array_size <= PAGE_SIZE)
-               kfree(pages);
-       else
-               vfree(pages);
+       kvfree(pages);
        return NULL;
 }
 
@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
                               size_t size, struct dma_attrs *attrs)
 {
        int count = size >> PAGE_SHIFT;
-       int array_size = count * sizeof(struct page *);
        int i;
 
        if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
                                __free_pages(pages[i], 0);
        }
 
-       if (array_size <= PAGE_SIZE)
-               kfree(pages);
-       else
-               vfree(pages);
+       kvfree(pages);
        return 0;
 }
 
index 2c400412b3bce47ef97b7f7eb74b0018d88e7dab..21074f674bdeb707c137a9b87c2a65bde4bbb25a 100644 (file)
@@ -105,18 +105,6 @@ config ARCH_TEGRA
        help
          This enables support for the NVIDIA Tegra SoC family.
 
-config ARCH_TEGRA_132_SOC
-       bool "NVIDIA Tegra132 SoC"
-       depends on ARCH_TEGRA
-       select PINCTRL_TEGRA124
-       select USB_ULPI if USB_PHY
-       select USB_ULPI_VIEWPORT if USB_PHY
-       help
-         Enable support for NVIDIA Tegra132 SoC, based on the Denver
-         ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
-         but contains an NVIDIA Denver CPU complex in place of
-         Tegra124's "4+1" Cortex-A15 CPU complex.
-
 config ARCH_SPRD
        bool "Spreadtrum SoC platform"
        help
index cd822d8454c0536165810004089c3a7ce5d0068d..307237cfe728b152e350b5fb4f1c1abc18d6fb4c 100644 (file)
@@ -27,6 +27,8 @@ $(warning LSE atomics not supported by binutils)
 endif
 
 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS  += $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS  += $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
index 76e7510835b29322957d0d5b05654ce5fc6f4597..f832b8a7453a307b351273a83e320e413e27ce4a 100644 (file)
@@ -9,6 +9,7 @@ dts-dirs += freescale
 dts-dirs += hisilicon
 dts-dirs += marvell
 dts-dirs += mediatek
+dts-dirs += nvidia
 dts-dirs += qcom
 dts-dirs += renesas
 dts-dirs += rockchip
index dd5158eb5872396693bba678cda765a719e25e97..e5b59ca9debb1916764746bb168d969572e17771 100644 (file)
                             <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
                             <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
index da7b6e61325758c547b521c3df675adeb9568d2b..933cba3599189c7f85c6a6adea77eb661be459f5 100644 (file)
@@ -23,9 +23,8 @@ soc0: soc@000000000 {
                };
        };
 
-       dsa: dsa@c7000000 {
+       dsaf0: dsa@c7000000 {
                compatible = "hisilicon,hns-dsaf-v1";
-               dsa_name = "dsaf0";
                mode = "6port-16rss";
                interrupt-parent = <&mbigen_dsa>;
 
@@ -127,7 +126,7 @@ soc0: soc@000000000 {
 
        eth0: ethernet@0{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <0>;
                local-mac-address = [00 00 00 01 00 58];
                status = "disabled";
@@ -135,14 +134,14 @@ soc0: soc@000000000 {
        };
        eth1: ethernet@1{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <1>;
                status = "disabled";
                dma-coherent;
        };
        eth2: ethernet@2{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <2>;
                local-mac-address = [00 00 00 01 00 5a];
                status = "disabled";
@@ -150,7 +149,7 @@ soc0: soc@000000000 {
        };
        eth3: ethernet@3{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <3>;
                local-mac-address = [00 00 00 01 00 5b];
                status = "disabled";
@@ -158,7 +157,7 @@ soc0: soc@000000000 {
        };
        eth4: ethernet@4{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <4>;
                local-mac-address = [00 00 00 01 00 5c];
                status = "disabled";
@@ -166,7 +165,7 @@ soc0: soc@000000000 {
        };
        eth5: ethernet@5{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <5>;
                local-mac-address = [00 00 00 01 00 5d];
                status = "disabled";
@@ -174,7 +173,7 @@ soc0: soc@000000000 {
        };
        eth6: ethernet@6{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <6>;
                local-mac-address = [00 00 00 01 00 5e];
                status = "disabled";
@@ -182,7 +181,7 @@ soc0: soc@000000000 {
        };
        eth7: ethernet@7{
                compatible = "hisilicon,hns-nic-v1";
-               ae-name = "dsaf0";
+               ae-handle = <&dsaf0>;
                port-id = <7>;
                local-mac-address = [00 00 00 01 00 5f];
                status = "disabled";
diff --git a/arch/arm64/boot/dts/nvidia/Makefile b/arch/arm64/boot/dts/nvidia/Makefile
new file mode 100644 (file)
index 0000000..a7e865d
--- /dev/null
@@ -0,0 +1,7 @@
+dtb-$(CONFIG_ARCH_TEGRA_132_SOC) += tegra132-norrin.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-0000.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
+
+always         := $(dtb-y)
+clean-files    := *.dtb
diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts
new file mode 100644 (file)
index 0000000..62f33fc
--- /dev/null
@@ -0,0 +1,1132 @@
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra132.dtsi"
+
+/ {
+       model = "NVIDIA Tegra132 Norrin";
+       compatible = "nvidia,norrin", "nvidia,tegra132", "nvidia,tegra124";
+
+       aliases {
+               rtc0 = "/i2c@0,7000d000/as3722@40";
+               rtc1 = "/rtc@0,7000e000";
+       };
+
+       chosen { };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x0 0x80000000>;
+       };
+
+       host1x@0,50000000 {
+               hdmi@0,54280000 {
+                       status = "disabled";
+
+                       vdd-supply = <&vdd_3v3_hdmi>;
+                       pll-supply = <&vdd_hdmi_pll>;
+                       hdmi-supply = <&vdd_5v0_hdmi>;
+
+                       nvidia,ddc-i2c-bus = <&hdmi_ddc>;
+                       nvidia,hpd-gpio =
+                               <&gpio TEGRA_GPIO(N, 7) GPIO_ACTIVE_HIGH>;
+               };
+
+               sor@0,54540000 {
+                       status = "okay";
+
+                       nvidia,dpaux = <&dpaux>;
+                       nvidia,panel = <&panel>;
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       vdd-supply = <&vdd_3v3_panel>;
+                       status = "okay";
+               };
+       };
+
+       gpu@0,57000000 {
+               status = "okay";
+
+               vdd-supply = <&vdd_gpu>;
+       };
+
+       pinmux@0,70000868 {
+               pinctrl-names = "default";
+               pinctrl-0 = <&pinmux_default>;
+
+               pinmux_default: pinmux@0 {
+                       dap_mclk1_pw4 {
+                               nvidia,pins = "dap_mclk1_pw4";
+                               nvidia,function = "extperiph1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_pa4 {
+                               nvidia,pins = "dap2_din_pa4";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap2_dout_pa5 {
+                               nvidia,pins = "dap2_dout_pa5",
+                                             "dap2_fs_pa2",
+                                             "dap2_sclk_pa3";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap3_dout_pp2 {
+                               nvidia,pins = "dap3_dout_pp2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_px0 {
+                               nvidia,pins = "dvfs_pwm_px0",
+                                             "dvfs_clk_px2";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ulpi_clk_py0 {
+                               nvidia,pins = "ulpi_clk_py0",
+                                             "ulpi_nxt_py2",
+                                             "ulpi_stp_py3";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ulpi_dir_py1 {
+                               nvidia,pins = "ulpi_dir_py1";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       cam_i2c_scl_pbb1 {
+                               nvidia,pins = "cam_i2c_scl_pbb1",
+                                             "cam_i2c_sda_pbb2";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_scl_pt5 {
+                               nvidia,pins = "gen2_i2c_scl_pt5",
+                                             "gen2_i2c_sda_pt6";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       pj7 {
+                               nvidia,pins = "pj7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pk6 {
+                               nvidia,pins = "spdif_in_pk6";
+                               nvidia,function = "spdif";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pg4 {
+                               nvidia,pins = "pg4",
+                                             "pg5",
+                                             "pg6",
+                                             "pi3";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pg7 {
+                               nvidia,pins = "pg7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ph1 {
+                               nvidia,pins = "ph1";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0",
+                                             "kb_row15_ps7",
+                                             "clk_32k_out_pa0";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc1_clk_pz0 {
+                               nvidia,pins = "sdmmc1_clk_pz0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc1_cmd_pz1 {
+                               nvidia,pins = "sdmmc1_cmd_pz1",
+                                             "sdmmc1_dat0_py7",
+                                             "sdmmc1_dat1_py6",
+                                             "sdmmc1_dat2_py5",
+                                             "sdmmc1_dat3_py4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc3_clk_pa6 {
+                               nvidia,pins = "sdmmc3_clk_pa6";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc3_cmd_pa7 {
+                               nvidia,pins = "sdmmc3_cmd_pa7",
+                                             "sdmmc3_dat0_pb7",
+                                             "sdmmc3_dat1_pb6",
+                                             "sdmmc3_dat2_pb5",
+                                             "sdmmc3_dat3_pb4",
+                                             "kb_col4_pq4",
+                                             "sdmmc3_clk_lb_out_pee4",
+                                             "sdmmc3_clk_lb_in_pee5",
+                                             "sdmmc3_cd_n_pv2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc4_clk_pcc4 {
+                               nvidia,pins = "sdmmc4_clk_pcc4";
+                               nvidia,function = "sdmmc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sdmmc4_cmd_pt7 {
+                               nvidia,pins = "sdmmc4_cmd_pt7",
+                                             "sdmmc4_dat0_paa0",
+                                             "sdmmc4_dat1_paa1",
+                                             "sdmmc4_dat2_paa2",
+                                             "sdmmc4_dat3_paa3",
+                                             "sdmmc4_dat4_paa4",
+                                             "sdmmc4_dat5_paa5",
+                                             "sdmmc4_dat6_paa6",
+                                             "sdmmc4_dat7_paa7";
+                               nvidia,function = "sdmmc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       mic_det_l {
+                               nvidia,pins = "kb_row7_pr7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       kb_row10_ps2 {
+                               nvidia,pins = "kb_row10_ps2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       kb_row9_ps1 {
+                               nvidia,pins = "kb_row9_ps1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_pz6 {
+                               nvidia,pins = "pwr_i2c_scl_pz6",
+                                             "pwr_i2c_sda_pz7";
+                               nvidia,function = "i2cpwr";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "rtck";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "pwron";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       kb_col0_ap {
+                               nvidia,pins = "kb_col0_pq0";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       en_vdd_sd {
+                               nvidia,pins = "kb_row0_pr0";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       lid_open {
+                               nvidia,pins = "kb_row4_pr4";
+                               nvidia,function = "rsvd3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       reset_out_n {
+                               nvidia,pins = "reset_out_n";
+                               nvidia,function = "reset_out_n";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk3_out_pee0 {
+                               nvidia,pins = "clk3_out_pee0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pc4 {
+                               nvidia,pins = "gen1_i2c_scl_pc4",
+                                             "gen1_i2c_sda_pc5";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_cec_pee3 {
+                               nvidia,pins = "hdmi_cec_pee3";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_int_pn7 {
+                               nvidia,pins = "hdmi_int_pn7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ddc_scl_pv4 {
+                               nvidia,pins = "ddc_scl_pv4",
+                                             "ddc_sda_pv5";
+                               nvidia,function = "i2c4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,rcv-sel = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en0_pn4 {
+                               nvidia,pins = "usb_vbus_en0_pn4",
+                                             "usb_vbus_en1_pn5",
+                                             "usb_vbus_en2_pff1";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,lock = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       drive_sdio1 {
+                               nvidia,pins = "drive_sdio1";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <36>;
+                               nvidia,pull-up-strength = <20>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_SLOW>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_SLOW>;
+                       };
+                       drive_sdio3 {
+                               nvidia,pins = "drive_sdio3";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <22>;
+                               nvidia,pull-up-strength = <36>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                       };
+                       drive_gma {
+                               nvidia,pins = "drive_gma";
+                               nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+                               nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+                               nvidia,pull-down-strength = <2>;
+                               nvidia,pull-up-strength = <1>;
+                               nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+                               nvidia,drive-type = <1>;
+                       };
+                       ac_ok {
+                               nvidia,pins = "pj0";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       codec_irq_l {
+                               nvidia,pins = "ph4";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       lcd_bl_en {
+                               nvidia,pins = "ph2";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_irq_l {
+                               nvidia,pins = "gpio_w3_aud_pw3";
+                               nvidia,function = "spi6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       tpm_davint_l {
+                               nvidia,pins = "ph6";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ts_irq_l {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       ts_reset_l {
+                               nvidia,pins = "pk4";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <1>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ts_shdn_l {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph7 {
+                               nvidia,pins = "ph7";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       sensor_irq_l {
+                               nvidia,pins = "pi6";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       wifi_en {
+                               nvidia,pins = "gpio_x7_aud_px7";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+                       chromeos_write_protect {
+                               nvidia,pins = "kb_row1_pr1";
+                               nvidia,function = "rsvd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       hp_det_l {
+                               nvidia,pins = "pi7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                       };
+                       soc_warm_reset_l {
+                               nvidia,pins = "pi5";
+                               nvidia,function = "gmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       pwm: pwm@0,7000a000 {
+               status = "okay";
+       };
+
+       /* HDMI DDC */
+       hdmi_ddc: i2c@0,7000c700 {
+               status = "okay";
+               clock-frequency = <100000>;
+       };
+
+       i2c@0,7000d000 {
+               status = "okay";
+               clock-frequency = <400000>;
+
+               as3722: pmic@40 {
+                       compatible = "ams,as3722";
+                       reg = <0x40>;
+                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+
+                       ams,system-power-controller;
+
+                       #interrupt-cells = <2>;
+                       interrupt-controller;
+
+                       #gpio-cells = <2>;
+                       gpio-controller;
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&as3722_default>;
+
+                       as3722_default: pinmux@0 {
+                               gpio0 {
+                                       pins = "gpio0";
+                                       function = "gpio";
+                                       bias-pull-down;
+                               };
+
+                               gpio1 {
+                                       pins = "gpio1";
+                                       function = "gpio";
+                                       bias-pull-up;
+                               };
+
+                               gpio2_4_7 {
+                                       pins = "gpio2", "gpio4", "gpio7";
+                                       function = "gpio";
+                                       bias-pull-up;
+                               };
+
+                               gpio3 {
+                                       pins = "gpio3";
+                                       function = "gpio";
+                                       bias-high-impedance;
+                               };
+
+                               gpio5 {
+                                       pins = "gpio5";
+                                       function = "clk32k-out";
+                                       bias-pull-down;
+                               };
+
+                               gpio6 {
+                                       pins = "gpio6";
+                                       function = "clk32k-out";
+                                       bias-pull-down;
+                               };
+                       };
+
+                       regulators {
+                               vsup-sd2-supply = <&vdd_5v0_sys>;
+                               vsup-sd3-supply = <&vdd_5v0_sys>;
+                               vsup-sd4-supply = <&vdd_5v0_sys>;
+                               vsup-sd5-supply = <&vdd_5v0_sys>;
+                               vin-ldo0-supply = <&vdd_1v35_lp0>;
+                               vin-ldo1-6-supply = <&vdd_3v3_sys>;
+                               vin-ldo2-5-7-supply = <&vddio_1v8>;
+                               vin-ldo3-4-supply = <&vdd_3v3_sys>;
+                               vin-ldo9-10-supply = <&vdd_5v0_sys>;
+                               vin-ldo11-supply = <&vdd_3v3_run>;
+
+                               sd0 {
+                                       regulator-name = "+VDD_CPU_AP";
+                                       regulator-min-microvolt = <700000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-max-microamp = <3500000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <2>;
+                               };
+
+                               sd1 {
+                                       regulator-name = "+VDD_CORE";
+                                       regulator-min-microvolt = <700000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-max-microamp = <4000000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <1>;
+                               };
+
+                               vdd_1v35_lp0: sd2 {
+                                       regulator-name = "+1.35V_LP0(sd2)";
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               sd3 {
+                                       regulator-name = "+1.35V_LP0(sd3)";
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               vdd_1v05_run: sd4 {
+                                       regulator-name = "+1.05V_RUN";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                               };
+
+                               vddio_1v8: sd5 {
+                                       regulator-name = "+1.8V_VDDIO";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               vdd_gpu: sd6 {
+                                       regulator-name = "+VDD_GPU_AP";
+                                       regulator-min-microvolt = <800000>;
+                                       regulator-max-microvolt = <1200000>;
+                                       regulator-min-microamp = <3500000>;
+                                       regulator-max-microamp = <3500000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               ldo0 {
+                                       regulator-name = "+1.05_RUN_AVDD";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,ext-control = <1>;
+                               };
+
+                               ldo1 {
+                                       regulator-name = "+1.8V_RUN_CAM";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+
+                               ldo2 {
+                                       regulator-name = "+1.2V_GEN_AVDD";
+                                       regulator-min-microvolt = <1200000>;
+                                       regulator-max-microvolt = <1200000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                               };
+
+                               ldo3 {
+                                       regulator-name = "+1.00V_LP0_VDD_RTC";
+                                       regulator-min-microvolt = <1000000>;
+                                       regulator-max-microvolt = <1000000>;
+                                       regulator-always-on;
+                                       regulator-boot-on;
+                                       ams,enable-tracking;
+                               };
+
+                               vdd_run_cam: ldo4 {
+                                       regulator-name = "+2.8V_RUN_CAM";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo5 {
+                                       regulator-name = "+1.2V_RUN_CAM_FRONT";
+                                       regulator-min-microvolt = <1200000>;
+                                       regulator-max-microvolt = <1200000>;
+                               };
+
+                               vddio_sdmmc3: ldo6 {
+                                       regulator-name = "+VDDIO_SDMMC3";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <3300000>;
+                               };
+
+                               ldo7 {
+                                       regulator-name = "+1.05V_RUN_CAM_REAR";
+                                       regulator-min-microvolt = <1050000>;
+                                       regulator-max-microvolt = <1050000>;
+                               };
+
+                               ldo9 {
+                                       regulator-name = "+2.8V_RUN_TOUCH";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo10 {
+                                       regulator-name = "+2.8V_RUN_CAM_AF";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <2800000>;
+                               };
+
+                               ldo11 {
+                                       regulator-name = "+1.8V_RUN_VPP_FUSE";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+                       };
+               };
+       };
+
+       spi@0,7000d400 {
+               status = "okay";
+
+               ec: cros-ec@0 {
+                       compatible = "google,cros-ec-spi";
+                       spi-max-frequency = <3000000>;
+                       interrupt-parent = <&gpio>;
+                       interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
+                       reg = <0>;
+
+                       google,cros-ec-spi-msg-delay = <2000>;
+
+                       i2c_20: i2c-tunnel {
+                               compatible = "google,cros-ec-i2c-tunnel";
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               google,remote-bus = <0>;
+
+                               charger: bq24735 {
+                                       compatible = "ti,bq24735";
+                                       reg = <0x9>;
+                                       interrupt-parent = <&gpio>;
+                                       interrupts = <TEGRA_GPIO(J, 0)
+                                                       GPIO_ACTIVE_HIGH>;
+                                       ti,ac-detect-gpios = <&gpio
+                                                       TEGRA_GPIO(J, 0)
+                                                       GPIO_ACTIVE_HIGH>;
+                               };
+
+                               battery: smart-battery {
+                                       compatible = "sbs,sbs-battery";
+                                       reg = <0xb>;
+                                       battery-name = "battery";
+                                       sbs,i2c-retry-count = <2>;
+                                       sbs,poll-retry-count = <10>;
+                               /*      power-supplies = <&charger>; */
+                               };
+                       };
+
+                       keyboard-controller {
+                               compatible = "google,cros-ec-keyb";
+                               keypad,num-rows = <8>;
+                               keypad,num-columns = <13>;
+                               google,needs-ghost-filter;
+                               linux,keymap =
+                                       <MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
+                                        MATRIX_KEY(0x00, 0x02, KEY_F1)
+                                        MATRIX_KEY(0x00, 0x03, KEY_B)
+                                        MATRIX_KEY(0x00, 0x04, KEY_F10)
+                                        MATRIX_KEY(0x00, 0x06, KEY_N)
+                                        MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
+                                        MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
+
+                                        MATRIX_KEY(0x01, 0x01, KEY_ESC)
+                                        MATRIX_KEY(0x01, 0x02, KEY_F4)
+                                        MATRIX_KEY(0x01, 0x03, KEY_G)
+                                        MATRIX_KEY(0x01, 0x04, KEY_F7)
+                                        MATRIX_KEY(0x01, 0x06, KEY_H)
+                                        MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
+                                        MATRIX_KEY(0x01, 0x09, KEY_F9)
+                                        MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
+
+                                        MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
+                                        MATRIX_KEY(0x02, 0x01, KEY_TAB)
+                                        MATRIX_KEY(0x02, 0x02, KEY_F3)
+                                        MATRIX_KEY(0x02, 0x03, KEY_T)
+                                        MATRIX_KEY(0x02, 0x04, KEY_F6)
+                                        MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
+                                        MATRIX_KEY(0x02, 0x06, KEY_Y)
+                                        MATRIX_KEY(0x02, 0x07, KEY_102ND)
+                                        MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
+                                        MATRIX_KEY(0x02, 0x09, KEY_F8)
+
+                                        MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
+                                        MATRIX_KEY(0x03, 0x02, KEY_F2)
+                                        MATRIX_KEY(0x03, 0x03, KEY_5)
+                                        MATRIX_KEY(0x03, 0x04, KEY_F5)
+                                        MATRIX_KEY(0x03, 0x06, KEY_6)
+                                        MATRIX_KEY(0x03, 0x08, KEY_MINUS)
+                                        MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
+
+                                        MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
+                                        MATRIX_KEY(0x04, 0x01, KEY_A)
+                                        MATRIX_KEY(0x04, 0x02, KEY_D)
+                                        MATRIX_KEY(0x04, 0x03, KEY_F)
+                                        MATRIX_KEY(0x04, 0x04, KEY_S)
+                                        MATRIX_KEY(0x04, 0x05, KEY_K)
+                                        MATRIX_KEY(0x04, 0x06, KEY_J)
+                                        MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
+                                        MATRIX_KEY(0x04, 0x09, KEY_L)
+                                        MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
+                                        MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
+
+                                        MATRIX_KEY(0x05, 0x01, KEY_Z)
+                                        MATRIX_KEY(0x05, 0x02, KEY_C)
+                                        MATRIX_KEY(0x05, 0x03, KEY_V)
+                                        MATRIX_KEY(0x05, 0x04, KEY_X)
+                                        MATRIX_KEY(0x05, 0x05, KEY_COMMA)
+                                        MATRIX_KEY(0x05, 0x06, KEY_M)
+                                        MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
+                                        MATRIX_KEY(0x05, 0x08, KEY_SLASH)
+                                        MATRIX_KEY(0x05, 0x09, KEY_DOT)
+                                        MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
+
+                                        MATRIX_KEY(0x06, 0x01, KEY_1)
+                                        MATRIX_KEY(0x06, 0x02, KEY_3)
+                                        MATRIX_KEY(0x06, 0x03, KEY_4)
+                                        MATRIX_KEY(0x06, 0x04, KEY_2)
+                                        MATRIX_KEY(0x06, 0x05, KEY_8)
+                                        MATRIX_KEY(0x06, 0x06, KEY_7)
+                                        MATRIX_KEY(0x06, 0x08, KEY_0)
+                                        MATRIX_KEY(0x06, 0x09, KEY_9)
+                                        MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)
+                                        MATRIX_KEY(0x06, 0x0b, KEY_DOWN)
+                                        MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)
+
+                                        MATRIX_KEY(0x07, 0x01, KEY_Q)
+                                        MATRIX_KEY(0x07, 0x02, KEY_E)
+                                        MATRIX_KEY(0x07, 0x03, KEY_R)
+                                        MATRIX_KEY(0x07, 0x04, KEY_W)
+                                        MATRIX_KEY(0x07, 0x05, KEY_I)
+                                        MATRIX_KEY(0x07, 0x06, KEY_U)
+                                        MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)
+                                        MATRIX_KEY(0x07, 0x08, KEY_P)
+                                        MATRIX_KEY(0x07, 0x09, KEY_O)
+                                        MATRIX_KEY(0x07, 0x0b, KEY_UP)
+                                        MATRIX_KEY(0x07, 0x0c, KEY_LEFT)>;
+                       };
+               };
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+               nvidia,suspend-mode = <0>;
+               #wake-cells = <3>;
+               nvidia,cpu-pwr-good-time = <500>;
+               nvidia,cpu-pwr-off-time = <300>;
+               nvidia,core-pwr-good-time = <641 3845>;
+               nvidia,core-pwr-off-time = <61036>;
+               nvidia,core-power-req-active-high;
+               nvidia,sys-clock-req-active-high;
+               nvidia,reset-gpio = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
+       };
+
+       /* WIFI/BT module */
+       sdhci@0,700b0000 {
+               status = "disabled";
+       };
+
+       /* external SD/MMC */
+       sdhci@0,700b0400 {
+               cd-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
+               power-gpios = <&gpio TEGRA_GPIO(R, 0) GPIO_ACTIVE_HIGH>;
+               wp-gpios = <&gpio TEGRA_GPIO(Q, 4) GPIO_ACTIVE_HIGH>;
+               status = "okay";
+               bus-width = <4>;
+               vqmmc-supply = <&vddio_sdmmc3>;
+       };
+
+       /* EMMC 4.51 */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       usb@0,7d000000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d000000 {
+               status = "okay";
+               vbus-supply = <&vdd_usb1_vbus>;
+       };
+
+       usb@0,7d004000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d004000 {
+               status = "okay";
+               vbus-supply = <&vdd_run_cam>;
+       };
+
+       usb@0,7d008000 {
+               status = "okay";
+       };
+
+       usb-phy@0,7d008000 {
+               status = "okay";
+               vbus-supply = <&vdd_usb3_vbus>;
+       };
+
+       backlight: backlight {
+               compatible = "pwm-backlight";
+
+               enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>;
+               power-supply = <&vdd_led>;
+               pwms = <&pwm 1 1000000>;
+
+               brightness-levels = <0 4 8 16 32 64 128 255>;
+               default-brightness-level = <6>;
+
+               backlight-boot-off;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg=<0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               lid {
+                       label = "Lid";
+                       gpios = <&gpio TEGRA_GPIO(R, 4) GPIO_ACTIVE_LOW>;
+                       linux,input-type = <5>;
+                       linux,code = <0>;
+                       debounce-interval = <1>;
+                       gpio-key,wakeup;
+               };
+
+               power {
+                       label = "Power";
+                       gpios = <&gpio TEGRA_GPIO(Q, 0) GPIO_ACTIVE_LOW>;
+                       linux,code = <KEY_POWER>;
+                       debounce-interval = <10>;
+                       gpio-key,wakeup;
+               };
+       };
+
+       panel: panel {
+               compatible = "innolux,n116bge", "simple-panel";
+               backlight = <&backlight>;
+               ddc-i2c-bus = <&dpaux>;
+       };
+
+       regulators {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               vdd_mux: regulator@0 {
+                       compatible = "regulator-fixed";
+                       reg = <0>;
+                       regulator-name = "+VDD_MUX";
+                       regulator-min-microvolt = <19000000>;
+                       regulator-max-microvolt = <19000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+               };
+
+               vdd_5v0_sys: regulator@1 {
+                       compatible = "regulator-fixed";
+                       reg = <1>;
+                       regulator-name = "+5V_SYS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_3v3_sys: regulator@2 {
+                       compatible = "regulator-fixed";
+                       reg = <2>;
+                       regulator-name = "+3.3V_SYS";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_3v3_run: regulator@3 {
+                       compatible = "regulator-fixed";
+                       reg = <3>;
+                       regulator-name = "+3.3V_RUN";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&as3722 1 GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_3v3_sys>;
+               };
+
+               vdd_3v3_hdmi: regulator@4 {
+                       compatible = "regulator-fixed";
+                       reg = <4>;
+                       regulator-name = "+3.3V_AVDD_HDMI_AP_GATED";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       vin-supply = <&vdd_3v3_run>;
+               };
+
+               vdd_led: regulator@5 {
+                       compatible = "regulator-fixed";
+                       reg = <5>;
+                       regulator-name = "+VDD_LED";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_mux>;
+               };
+
+               vdd_usb1_vbus: regulator@6 {
+                       compatible = "regulator-fixed";
+                       reg = <6>;
+                       regulator-name = "+5V_USB_HS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(N, 4) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       gpio-open-drain;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_usb3_vbus: regulator@7 {
+                       compatible = "regulator-fixed";
+                       reg = <7>;
+                       regulator-name = "+5V_USB_SS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(N, 5) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       gpio-open-drain;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_3v3_panel: regulator@8 {
+                       compatible = "regulator-fixed";
+                       reg = <8>;
+                       regulator-name = "+3.3V_PANEL";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       gpio = <&as3722 4 GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_3v3_sys>;
+               };
+
+               vdd_hdmi_pll: regulator@9 {
+                       compatible = "regulator-fixed";
+                       reg = <9>;
+                       regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL_AP_GATE";
+                       regulator-min-microvolt = <1050000>;
+                       regulator-max-microvolt = <1050000>;
+                       gpio = <&gpio TEGRA_GPIO(H, 7) GPIO_ACTIVE_LOW>;
+                       vin-supply = <&vdd_1v05_run>;
+               };
+
+               vdd_5v0_hdmi: regulator@10 {
+                       compatible = "regulator-fixed";
+                       reg = <10>;
+                       regulator-name = "+5V_HDMI_CON";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       gpio = <&gpio TEGRA_GPIO(K, 6) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+                       vin-supply = <&vdd_5v0_sys>;
+               };
+
+               vdd_5v0_ts: regulator@11 {
+                       compatible = "regulator-fixed";
+                       reg = <11>;
+                       regulator-name = "+5V_VDD_TS";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&gpio TEGRA_GPIO(K, 1) GPIO_ACTIVE_HIGH>;
+                       enable-active-high;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi
new file mode 100644 (file)
index 0000000..e8bb460
--- /dev/null
@@ -0,0 +1,990 @@
+#include <dt-bindings/clock/tegra124-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra124-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+       compatible = "nvidia,tegra132", "nvidia,tegra124";
+       interrupt-parent = <&lic>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       pcie-controller@0,01003000 {
+               compatible = "nvidia,tegra124-pcie";
+               device_type = "pci";
+               reg = <0x0 0x01003000 0x0 0x00000800   /* PADS registers */
+                      0x0 0x01003800 0x0 0x00000800   /* AFI registers */
+                      0x0 0x02000000 0x0 0x10000000>; /* configuration space */
+               reg-names = "pads", "afi", "cs";
+               interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, /* controller interrupt */
+                            <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; /* MSI interrupt */
+               interrupt-names = "intr", "msi";
+
+               #interrupt-cells = <1>;
+               interrupt-map-mask = <0 0 0 0>;
+               interrupt-map = <0 0 0 0 &gic GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+               bus-range = <0x00 0xff>;
+               #address-cells = <3>;
+               #size-cells = <2>;
+
+               ranges = <0x82000000 0 0x01000000 0x0 0x01000000 0 0x00001000   /* port 0 configuration space */
+                         0x82000000 0 0x01001000 0x0 0x01001000 0 0x00001000   /* port 1 configuration space */
+                         0x81000000 0 0x0        0x0 0x12000000 0 0x00010000   /* downstream I/O (64 KiB) */
+                         0x82000000 0 0x13000000 0x0 0x13000000 0 0x0d000000   /* non-prefetchable memory (208 MiB) */
+                         0xc2000000 0 0x20000000 0x0 0x20000000 0 0x20000000>; /* prefetchable memory (512 MiB) */
+
+               clocks = <&tegra_car TEGRA124_CLK_PCIE>,
+                        <&tegra_car TEGRA124_CLK_AFI>,
+                        <&tegra_car TEGRA124_CLK_PLL_E>,
+                        <&tegra_car TEGRA124_CLK_CML0>;
+               clock-names = "pex", "afi", "pll_e", "cml";
+               resets = <&tegra_car 70>,
+                        <&tegra_car 72>,
+                        <&tegra_car 74>;
+               reset-names = "pex", "afi", "pcie_x";
+               status = "disabled";
+
+               phys = <&padctl TEGRA_XUSB_PADCTL_PCIE>;
+               phy-names = "pcie";
+
+               pci@1,0 {
+                       device_type = "pci";
+                       assigned-addresses = <0x82000800 0 0x01000000 0 0x1000>;
+                       reg = <0x000800 0 0 0 0>;
+                       status = "disabled";
+
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       nvidia,num-lanes = <2>;
+               };
+
+               pci@2,0 {
+                       device_type = "pci";
+                       assigned-addresses = <0x82001000 0 0x01001000 0 0x1000>;
+                       reg = <0x001000 0 0 0 0>;
+                       status = "disabled";
+
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       ranges;
+
+                       nvidia,num-lanes = <1>;
+               };
+       };
+
+       host1x@0,50000000 {
+               compatible = "nvidia,tegra124-host1x", "simple-bus";
+               reg = <0x0 0x50000000 0x0 0x00034000>;
+               interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+                            <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+               clocks = <&tegra_car TEGRA124_CLK_HOST1X>;
+               clock-names = "host1x";
+               resets = <&tegra_car 28>;
+               reset-names = "host1x";
+
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               ranges = <0 0x54000000 0 0x54000000 0 0x01000000>;
+
+               dc@0,54200000 {
+                       compatible = "nvidia,tegra124-dc";
+                       reg = <0x0 0x54200000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DISP1>,
+                                <&tegra_car TEGRA124_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 27>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DC>;
+
+                       nvidia,head = <0>;
+               };
+
+               dc@0,54240000 {
+                       compatible = "nvidia,tegra124-dc";
+                       reg = <0x0 0x54240000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DISP2>,
+                                <&tegra_car TEGRA124_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 26>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+                       nvidia,head = <1>;
+               };
+
+               hdmi@0,54280000 {
+                       compatible = "nvidia,tegra124-hdmi";
+                       reg = <0x0 0x54280000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_HDMI>,
+                                <&tegra_car TEGRA124_CLK_PLL_D2_OUT0>;
+                       clock-names = "hdmi", "parent";
+                       resets = <&tegra_car 51>;
+                       reset-names = "hdmi";
+                       status = "disabled";
+               };
+
+               sor@0,54540000 {
+                       compatible = "nvidia,tegra124-sor";
+                       reg = <0x0 0x54540000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_SOR0>,
+                                <&tegra_car TEGRA124_CLK_PLL_D_OUT0>,
+                                <&tegra_car TEGRA124_CLK_PLL_DP>,
+                                <&tegra_car TEGRA124_CLK_CLK_M>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 182>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       compatible = "nvidia,tegra124-dpaux";
+                       reg = <0x0 0x545c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA124_CLK_DPAUX>,
+                                <&tegra_car TEGRA124_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 181>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+       };
+
+       gic: interrupt-controller@0,50041000 {
+               compatible = "arm,cortex-a15-gic";
+               #interrupt-cells = <3>;
+               interrupt-controller;
+               reg = <0x0 0x50041000 0x0 0x1000>,
+                     <0x0 0x50042000 0x0 0x2000>,
+                     <0x0 0x50044000 0x0 0x2000>,
+                     <0x0 0x50046000 0x0 0x2000>;
+               interrupts = <GIC_PPI 9
+                       (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+               interrupt-parent = <&gic>;
+       };
+
+       gpu@0,57000000 {
+               compatible = "nvidia,gk20a";
+               reg = <0x0 0x57000000 0x0 0x01000000>,
+                     <0x0 0x58000000 0x0 0x01000000>;
+               interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "stall", "nonstall";
+               clocks = <&tegra_car TEGRA124_CLK_GPU>,
+                        <&tegra_car TEGRA124_CLK_PLL_P_OUT5>;
+               clock-names = "gpu", "pwr";
+               resets = <&tegra_car 184>;
+               reset-names = "gpu";
+               status = "disabled";
+       };
+
+       lic: interrupt-controller@60004000 {
+               compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr";
+               reg = <0x0 0x60004000 0x0 0x100>,
+                     <0x0 0x60004100 0x0 0x100>,
+                     <0x0 0x60004200 0x0 0x100>,
+                     <0x0 0x60004300 0x0 0x100>,
+                     <0x0 0x60004400 0x0 0x100>;
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               interrupt-parent = <&gic>;
+       };
+
+       timer@0,60005000 {
+               compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer";
+               reg = <0x0 0x60005000 0x0 0x400>;
+               interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_TIMER>;
+               clock-names = "timer";
+       };
+
+       tegra_car: clock@0,60006000 {
+               compatible = "nvidia,tegra132-car";
+               reg = <0x0 0x60006000 0x0 0x1000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+               nvidia,external-memory-controller = <&emc>;
+       };
+
+       flow-controller@0,60007000 {
+               compatible = "nvidia,tegra124-flowctrl";
+               reg = <0x0 0x60007000 0x0 0x1000>;
+       };
+
+       actmon@0,6000c800 {
+               compatible = "nvidia,tegra124-actmon";
+               reg = <0x0 0x6000c800 0x0 0x400>;
+               interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
+                        <&tegra_car TEGRA124_CLK_EMC>;
+               clock-names = "actmon", "emc";
+               resets = <&tegra_car 119>;
+               reset-names = "actmon";
+       };
+
+       gpio: gpio@0,6000d000 {
+               compatible = "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+               reg = <0x0 0x6000d000 0x0 0x1000>;
+               interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               #interrupt-cells = <2>;
+               interrupt-controller;
+       };
+
+       apbdma: dma@0,60020000 {
+               compatible = "nvidia,tegra124-apbdma", "nvidia,tegra148-apbdma";
+               reg = <0x0 0x60020000 0x0 0x1400>;
+               interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_APBDMA>;
+               clock-names = "dma";
+               resets = <&tegra_car 34>;
+               reset-names = "dma";
+               #dma-cells = <1>;
+       };
+
+       apbmisc@0,70000800 {
+               compatible = "nvidia,tegra124-apbmisc", "nvidia,tegra20-apbmisc";
+               reg = <0x0 0x70000800 0x0 0x64>,   /* Chip revision */
+                     <0x0 0x7000e864 0x0 0x04>;   /* Strapping options */
+       };
+
+       pinmux: pinmux@0,70000868 {
+               compatible = "nvidia,tegra124-pinmux";
+               reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */
+                     <0x0 0x70003000 0x0 0x434>, /* Mux registers */
+                     <0x0 0x70000820 0x0 0x008>; /* MIPI pad control */
+       };
+
+       /*
+        * There are two serial driver i.e. 8250 based simple serial
+        * driver and APB DMA based serial driver for higher baudrate
+        * and performace. To enable the 8250 based driver, the compatible
+        * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+        * the APB DMA based serial driver, the comptible is
+        * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+        */
+       uarta: serial@0,70006000 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006000 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTA>;
+               clock-names = "serial";
+               resets = <&tegra_car 6>;
+               reset-names = "serial";
+               dmas = <&apbdma 8>, <&apbdma 8>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartb: serial@0,70006040 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006040 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTB>;
+               clock-names = "serial";
+               resets = <&tegra_car 7>;
+               reset-names = "serial";
+               dmas = <&apbdma 9>, <&apbdma 9>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartc: serial@0,70006200 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006200 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTC>;
+               clock-names = "serial";
+               resets = <&tegra_car 55>;
+               reset-names = "serial";
+               dmas = <&apbdma 10>, <&apbdma 10>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartd: serial@0,70006300 {
+               compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006300 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_UARTD>;
+               clock-names = "serial";
+               resets = <&tegra_car 65>;
+               reset-names = "serial";
+               dmas = <&apbdma 19>, <&apbdma 19>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       pwm: pwm@0,7000a000 {
+               compatible = "nvidia,tegra124-pwm", "nvidia,tegra20-pwm";
+               reg = <0x0 0x7000a000 0x0 0x100>;
+               #pwm-cells = <2>;
+               clocks = <&tegra_car TEGRA124_CLK_PWM>;
+               clock-names = "pwm";
+               resets = <&tegra_car 17>;
+               reset-names = "pwm";
+               status = "disabled";
+       };
+
+       i2c@0,7000c000 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c000 0x0 0x100>;
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C1>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 12>;
+               reset-names = "i2c";
+               dmas = <&apbdma 21>, <&apbdma 21>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c400 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c400 0x0 0x100>;
+               interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C2>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 54>;
+               reset-names = "i2c";
+               dmas = <&apbdma 22>, <&apbdma 22>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c500 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c500 0x0 0x100>;
+               interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C3>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 67>;
+               reset-names = "i2c";
+               dmas = <&apbdma 23>, <&apbdma 23>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c700 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c700 0x0 0x100>;
+               interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C4>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 103>;
+               reset-names = "i2c";
+               dmas = <&apbdma 26>, <&apbdma 26>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d000 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d000 0x0 0x100>;
+               interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C5>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 47>;
+               reset-names = "i2c";
+               dmas = <&apbdma 24>, <&apbdma 24>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d100 {
+               compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d100 0x0 0x100>;
+               interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_I2C6>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 166>;
+               reset-names = "i2c";
+               dmas = <&apbdma 30>, <&apbdma 30>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d400 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d400 0x0 0x200>;
+               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC1>;
+               clock-names = "spi";
+               resets = <&tegra_car 41>;
+               reset-names = "spi";
+               dmas = <&apbdma 15>, <&apbdma 15>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d600 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d600 0x0 0x200>;
+               interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC2>;
+               clock-names = "spi";
+               resets = <&tegra_car 44>;
+               reset-names = "spi";
+               dmas = <&apbdma 16>, <&apbdma 16>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d800 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d800 0x0 0x200>;
+               interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC3>;
+               clock-names = "spi";
+               resets = <&tegra_car 46>;
+               reset-names = "spi";
+               dmas = <&apbdma 17>, <&apbdma 17>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000da00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000da00 0x0 0x200>;
+               interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC4>;
+               clock-names = "spi";
+               resets = <&tegra_car 68>;
+               reset-names = "spi";
+               dmas = <&apbdma 18>, <&apbdma 18>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000dc00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000dc00 0x0 0x200>;
+               interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC5>;
+               clock-names = "spi";
+               resets = <&tegra_car 104>;
+               reset-names = "spi";
+               dmas = <&apbdma 27>, <&apbdma 27>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000de00 {
+               compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000de00 0x0 0x200>;
+               interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA124_CLK_SBC6>;
+               clock-names = "spi";
+               resets = <&tegra_car 105>;
+               reset-names = "spi";
+               dmas = <&apbdma 28>, <&apbdma 28>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       rtc@0,7000e000 {
+               compatible = "nvidia,tegra124-rtc", "nvidia,tegra20-rtc";
+               reg = <0x0 0x7000e000 0x0 0x100>;
+               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_RTC>;
+               clock-names = "rtc";
+       };
+
+       pmc@0,7000e400 {
+               compatible = "nvidia,tegra124-pmc";
+               reg = <0x0 0x7000e400 0x0 0x400>;
+               clocks = <&tegra_car TEGRA124_CLK_PCLK>, <&clk32k_in>;
+               clock-names = "pclk", "clk32k_in";
+       };
+
+       fuse@0,7000f800 {
+               compatible = "nvidia,tegra124-efuse";
+               reg = <0x0 0x7000f800 0x0 0x400>;
+               clocks = <&tegra_car TEGRA124_CLK_FUSE>;
+               clock-names = "fuse";
+               resets = <&tegra_car 39>;
+               reset-names = "fuse";
+       };
+
+       mc: memory-controller@0,70019000 {
+               compatible = "nvidia,tegra132-mc";
+               reg = <0x0 0x70019000 0x0 0x1000>;
+               clocks = <&tegra_car TEGRA124_CLK_MC>;
+               clock-names = "mc";
+
+               interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+               #iommu-cells = <1>;
+       };
+
+       emc: emc@0,7001b000 {
+               compatible = "nvidia,tegra132-emc", "nvidia,tegra124-emc";
+               reg = <0x0 0x7001b000 0x0 0x1000>;
+
+               nvidia,memory-controller = <&mc>;
+       };
+
+       sata@0,70020000 {
+               compatible = "nvidia,tegra124-ahci";
+               reg = <0x0 0x70027000 0x0 0x2000>, /* AHCI */
+                     <0x0 0x70020000 0x0 0x7000>; /* SATA */
+               interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SATA>,
+                        <&tegra_car TEGRA124_CLK_SATA_OOB>,
+                        <&tegra_car TEGRA124_CLK_CML1>,
+                        <&tegra_car TEGRA124_CLK_PLL_E>;
+               clock-names = "sata", "sata-oob", "cml1", "pll_e";
+               resets = <&tegra_car 124>,
+                        <&tegra_car 123>,
+                        <&tegra_car 129>;
+               reset-names = "sata", "sata-oob", "sata-cold";
+               phys = <&padctl TEGRA_XUSB_PADCTL_SATA>;
+               phy-names = "sata-phy";
+               status = "disabled";
+       };
+
+       hda@0,70030000 {
+               compatible = "nvidia,tegra132-hda", "nvidia,tegra124-hda",
+                            "nvidia,tegra30-hda";
+               reg = <0x0 0x70030000 0x0 0x10000>;
+               interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_HDA>,
+                        <&tegra_car TEGRA124_CLK_HDA2HDMI>,
+                        <&tegra_car TEGRA124_CLK_HDA2CODEC_2X>;
+               clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+               resets = <&tegra_car 125>, /* hda */
+                        <&tegra_car 128>, /* hda2hdmi */
+                        <&tegra_car 111>; /* hda2codec_2x */
+               reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+               status = "disabled";
+       };
+
+       padctl: padctl@0,7009f000 {
+               compatible = "nvidia,tegra132-xusb-padctl",
+                            "nvidia,tegra124-xusb-padctl";
+               reg = <0x0 0x7009f000 0x0 0x1000>;
+               resets = <&tegra_car 142>;
+               reset-names = "padctl";
+
+               #phy-cells = <1>;
+
+               phys {
+                       pcie-0 {
+                               status = "disabled";
+                       };
+
+                       sata-0 {
+                               status = "disabled";
+                       };
+
+                       usb3-0 {
+                               status = "disabled";
+                       };
+
+                       usb3-1 {
+                               status = "disabled";
+                       };
+
+                       utmi-0 {
+                               status = "disabled";
+                       };
+
+                       utmi-1 {
+                               status = "disabled";
+                       };
+
+                       utmi-2 {
+                               status = "disabled";
+                       };
+               };
+       };
+
+       sdhci@0,700b0000 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0000 0x0 0x200>;
+               interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC1>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 14>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0200 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0200 0x0 0x200>;
+               interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC2>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 9>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0400 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0400 0x0 0x200>;
+               interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC3>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 69>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0600 {
+               compatible = "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0600 0x0 0x200>;
+               interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_SDMMC4>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 15>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       soctherm: thermal-sensor@0,700e2000 {
+               compatible = "nvidia,tegra124-soctherm";
+               reg = <0x0 0x700e2000 0x0 0x1000>;
+               interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+                       <&tegra_car TEGRA124_CLK_SOC_THERM>;
+               clock-names = "tsensor", "soctherm";
+               resets = <&tegra_car 78>;
+               reset-names = "soctherm";
+               #thermal-sensor-cells = <1>;
+       };
+
+       ahub@0,70300000 {
+               compatible = "nvidia,tegra124-ahub";
+               reg = <0x0 0x70300000 0x0 0x200>,
+                     <0x0 0x70300800 0x0 0x800>,
+                     <0x0 0x70300200 0x0 0x600>;
+               interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA124_CLK_D_AUDIO>,
+                        <&tegra_car TEGRA124_CLK_APBIF>;
+               clock-names = "d_audio", "apbif";
+               resets = <&tegra_car 106>, /* d_audio */
+                        <&tegra_car 107>, /* apbif */
+                        <&tegra_car 30>,  /* i2s0 */
+                        <&tegra_car 11>,  /* i2s1 */
+                        <&tegra_car 18>,  /* i2s2 */
+                        <&tegra_car 101>, /* i2s3 */
+                        <&tegra_car 102>, /* i2s4 */
+                        <&tegra_car 108>, /* dam0 */
+                        <&tegra_car 109>, /* dam1 */
+                        <&tegra_car 110>, /* dam2 */
+                        <&tegra_car 10>,  /* spdif */
+                        <&tegra_car 153>, /* amx */
+                        <&tegra_car 185>, /* amx1 */
+                        <&tegra_car 154>, /* adx */
+                        <&tegra_car 180>, /* adx1 */
+                        <&tegra_car 186>, /* afc0 */
+                        <&tegra_car 187>, /* afc1 */
+                        <&tegra_car 188>, /* afc2 */
+                        <&tegra_car 189>, /* afc3 */
+                        <&tegra_car 190>, /* afc4 */
+                        <&tegra_car 191>; /* afc5 */
+               reset-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2",
+                             "i2s3", "i2s4", "dam0", "dam1", "dam2",
+                             "spdif", "amx", "amx1", "adx", "adx1",
+                             "afc0", "afc1", "afc2", "afc3", "afc4", "afc5";
+               dmas = <&apbdma 1>, <&apbdma 1>,
+                      <&apbdma 2>, <&apbdma 2>,
+                      <&apbdma 3>, <&apbdma 3>,
+                      <&apbdma 4>, <&apbdma 4>,
+                      <&apbdma 6>, <&apbdma 6>,
+                      <&apbdma 7>, <&apbdma 7>,
+                      <&apbdma 12>, <&apbdma 12>,
+                      <&apbdma 13>, <&apbdma 13>,
+                      <&apbdma 14>, <&apbdma 14>,
+                      <&apbdma 29>, <&apbdma 29>;
+               dma-names = "rx0", "tx0", "rx1", "tx1", "rx2", "tx2",
+                           "rx3", "tx3", "rx4", "tx4", "rx5", "tx5",
+                           "rx6", "tx6", "rx7", "tx7", "rx8", "tx8",
+                           "rx9", "tx9";
+               ranges;
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               tegra_i2s0: i2s@0,70301000 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301000 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <4 4>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S0>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 30>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s1: i2s@0,70301100 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301100 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <5 5>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S1>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 11>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s2: i2s@0,70301200 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301200 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <6 6>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S2>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 18>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s3: i2s@0,70301300 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301300 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <7 7>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S3>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 101>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+
+               tegra_i2s4: i2s@0,70301400 {
+                       compatible = "nvidia,tegra124-i2s";
+                       reg = <0x0 0x70301400 0x0 0x100>;
+                       nvidia,ahub-cif-ids = <8 8>;
+                       clocks = <&tegra_car TEGRA124_CLK_I2S4>;
+                       clock-names = "i2s";
+                       resets = <&tegra_car 102>;
+                       reset-names = "i2s";
+                       status = "disabled";
+               };
+       };
+
+       usb@0,7d000000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d000000 0x0 0x4000>;
+               interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "usb";
+               resets = <&tegra_car 22>;
+               reset-names = "usb";
+               nvidia,phy = <&phy1>;
+               status = "disabled";
+       };
+
+       phy1: usb-phy@0,7d000000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d000000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USBD>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 22>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
+               status = "disabled";
+       };
+
+       usb@0,7d004000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d004000 0x0 0x4000>;
+               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB2>;
+               clock-names = "usb";
+               resets = <&tegra_car 58>;
+               reset-names = "usb";
+               nvidia,phy = <&phy2>;
+               status = "disabled";
+       };
+
+       phy2: usb-phy@0,7d004000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d004000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB2>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 58>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       usb@0,7d008000 {
+               compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d008000 0x0 0x4000>;
+               interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB3>;
+               clock-names = "usb";
+               resets = <&tegra_car 59>;
+               reset-names = "usb";
+               nvidia,phy = <&phy3>;
+               status = "disabled";
+       };
+
+       phy3: usb-phy@0,7d008000 {
+               compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d008000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA124_CLK_USB3>,
+                        <&tegra_car TEGRA124_CLK_PLL_U>,
+                        <&tegra_car TEGRA124_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 59>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "nvidia,denver", "arm,armv8";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "nvidia,denver", "arm,armv8";
+                       reg = <1>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv7-timer";
+               interrupts = <GIC_PPI 13
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+               interrupt-parent = <&gic>;
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
new file mode 100644 (file)
index 0000000..2b7f889
--- /dev/null
@@ -0,0 +1,45 @@
+#include "tegra210.dtsi"
+
+/ {
+       model = "NVIDIA Jetson TX1";
+       compatible = "nvidia,p2180", "nvidia,tegra210";
+
+       aliases {
+               rtc1 = "/rtc@0,7000e000";
+               serial0 = &uarta;
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x1 0x0>;
+       };
+
+       /* debug port */
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+       };
+
+       /* eMMC */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg = <0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2371-0000.dts
new file mode 100644 (file)
index 0000000..1ddd851
--- /dev/null
@@ -0,0 +1,9 @@
+/dts-v1/;
+
+#include "tegra210-p2530.dtsi"
+#include "tegra210-p2595.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2371 (P2530/P2595) reference design";
+       compatible = "nvidia,p2371-0000", "nvidia,tegra210";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts
new file mode 100644 (file)
index 0000000..683b339
--- /dev/null
@@ -0,0 +1,9 @@
+/dts-v1/;
+
+#include "tegra210-p2180.dtsi"
+#include "tegra210-p2597.dtsi"
+
+/ {
+       model = "NVIDIA Jetson TX1 Developer Kit";
+       compatible = "nvidia,p2371-2180", "nvidia,tegra210";
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2530.dtsi
new file mode 100644 (file)
index 0000000..ece0dec
--- /dev/null
@@ -0,0 +1,50 @@
+#include "tegra210.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2530 main board";
+       compatible = "nvidia,p2530", "nvidia,tegra210";
+
+       aliases {
+               rtc1 = "/rtc@0,7000e000";
+               serial0 = &uarta;
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x0 0xc0000000>;
+       };
+
+       /* debug port */
+       serial@0,70006000 {
+               status = "okay";
+       };
+
+       i2c@0,7000d000 {
+               status = "okay";
+               clock-frequency = <400000>;
+       };
+
+       pmc@0,7000e400 {
+               nvidia,invert-interrupt;
+       };
+
+       /* eMMC */
+       sdhci@0,700b0600 {
+               status = "okay";
+               bus-width = <8>;
+               non-removable;
+       };
+
+       clocks {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               clk32k_in: clock@0 {
+                       compatible = "fixed-clock";
+                       reg = <0>;
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2571.dts b/arch/arm64/boot/dts/nvidia/tegra210-p2571.dts
new file mode 100644 (file)
index 0000000..58d27dd
--- /dev/null
@@ -0,0 +1,1302 @@
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra210-p2530.dtsi"
+
+/ {
+       model = "NVIDIA Tegra210 P2571 reference design";
+       compatible = "nvidia,p2571", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,function = "aud";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "sys";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2595.dtsi
new file mode 100644 (file)
index 0000000..f3f9139
--- /dev/null
@@ -0,0 +1,1272 @@
+/ {
+       model = "NVIDIA Tegra210 P2595 I/O board";
+       compatible = "nvidia,p2595", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "pe";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,function = "i2s1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,function = "spi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "dmic1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "dmic1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "dmic2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "dmic2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "displaya";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,function = "rsvd2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,function = "aud";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "sys";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
new file mode 100644 (file)
index 0000000..be3eccb
--- /dev/null
@@ -0,0 +1,1270 @@
+/ {
+       model = "NVIDIA Tegra210 P2597 I/O board";
+       compatible = "nvidia,p2597", "nvidia,tegra210";
+
+       pinmux: pinmux@0,700008d4 {
+               pinctrl-names = "boot";
+               pinctrl-0 = <&state_boot>;
+
+               state_boot: pinmux {
+                       pex_l0_rst_n_pa0 {
+                               nvidia,pins = "pex_l0_rst_n_pa0";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l0_clkreq_n_pa1 {
+                               nvidia,pins = "pex_l0_clkreq_n_pa1";
+                               nvidia,function = "pe0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_wake_n_pa2 {
+                               nvidia,pins = "pex_wake_n_pa2";
+                               nvidia,function = "pe";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_rst_n_pa3 {
+                               nvidia,pins = "pex_l1_rst_n_pa3";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       pex_l1_clkreq_n_pa4 {
+                               nvidia,pins = "pex_l1_clkreq_n_pa4";
+                               nvidia,function = "pe1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       sata_led_active_pa5 {
+                               nvidia,pins = "sata_led_active_pa5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pa6 {
+                               nvidia,pins = "pa6";
+                               nvidia,function = "sata";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_fs_pb0 {
+                               nvidia,pins = "dap1_fs_pb0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_din_pb1 {
+                               nvidia,pins = "dap1_din_pb1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_dout_pb2 {
+                               nvidia,pins = "dap1_dout_pb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap1_sclk_pb3 {
+                               nvidia,pins = "dap1_sclk_pb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_mosi_pb4 {
+                               nvidia,pins = "spi2_mosi_pb4";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_miso_pb5 {
+                               nvidia,pins = "spi2_miso_pb5";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_sck_pb6 {
+                               nvidia,pins = "spi2_sck_pb6";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs0_pb7 {
+                               nvidia,pins = "spi2_cs0_pb7";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_mosi_pc0 {
+                               nvidia,pins = "spi1_mosi_pc0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_miso_pc1 {
+                               nvidia,pins = "spi1_miso_pc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_sck_pc2 {
+                               nvidia,pins = "spi1_sck_pc2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs0_pc3 {
+                               nvidia,pins = "spi1_cs0_pc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi1_cs1_pc4 {
+                               nvidia,pins = "spi1_cs1_pc4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_sck_pc5 {
+                               nvidia,pins = "spi4_sck_pc5";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_cs0_pc6 {
+                               nvidia,pins = "spi4_cs0_pc6";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_mosi_pc7 {
+                               nvidia,pins = "spi4_mosi_pc7";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi4_miso_pd0 {
+                               nvidia,pins = "spi4_miso_pd0";
+                               nvidia,function = "spi4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_tx_pd1 {
+                               nvidia,pins = "uart3_tx_pd1";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rx_pd2 {
+                               nvidia,pins = "uart3_rx_pd2";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_rts_pd3 {
+                               nvidia,pins = "uart3_rts_pd3";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart3_cts_pd4 {
+                               nvidia,pins = "uart3_cts_pd4";
+                               nvidia,function = "uartc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_clk_pe0 {
+                               nvidia,pins = "dmic1_clk_pe0";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic1_dat_pe1 {
+                               nvidia,pins = "dmic1_dat_pe1";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_clk_pe2 {
+                               nvidia,pins = "dmic2_clk_pe2";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic2_dat_pe3 {
+                               nvidia,pins = "dmic2_dat_pe3";
+                               nvidia,function = "i2s3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_clk_pe4 {
+                               nvidia,pins = "dmic3_clk_pe4";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dmic3_dat_pe5 {
+                               nvidia,pins = "dmic3_dat_pe5";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe6 {
+                               nvidia,pins = "pe6";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pe7 {
+                               nvidia,pins = "pe7";
+                               nvidia,function = "pwm3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_scl_pf0 {
+                               nvidia,pins = "gen3_i2c_scl_pf0";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen3_i2c_sda_pf1 {
+                               nvidia,pins = "gen3_i2c_sda_pf1";
+                               nvidia,function = "i2c3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_tx_pg0 {
+                               nvidia,pins = "uart2_tx_pg0";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rx_pg1 {
+                               nvidia,pins = "uart2_rx_pg1";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_rts_pg2 {
+                               nvidia,pins = "uart2_rts_pg2";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart2_cts_pg3 {
+                               nvidia,pins = "uart2_cts_pg3";
+                               nvidia,function = "uartb";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_en_ph0 {
+                               nvidia,pins = "wifi_en_ph0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_rst_ph1 {
+                               nvidia,pins = "wifi_rst_ph1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       wifi_wake_ap_ph2 {
+                               nvidia,pins = "wifi_wake_ap_ph2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_bt_ph3 {
+                               nvidia,pins = "ap_wake_bt_ph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_rst_ph4 {
+                               nvidia,pins = "bt_rst_ph4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       bt_wake_ap_ph5 {
+                               nvidia,pins = "bt_wake_ap_ph5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ph6 {
+                               nvidia,pins = "ph6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_wake_nfc_ph7 {
+                               nvidia,pins = "ap_wake_nfc_ph7";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_en_pi0 {
+                               nvidia,pins = "nfc_en_pi0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       nfc_int_pi1 {
+                               nvidia,pins = "nfc_int_pi1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_en_pi2 {
+                               nvidia,pins = "gps_en_pi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gps_rst_pi3 {
+                               nvidia,pins = "gps_rst_pi3";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_tx_pi4 {
+                               nvidia,pins = "uart4_tx_pi4";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rx_pi5 {
+                               nvidia,pins = "uart4_rx_pi5";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_rts_pi6 {
+                               nvidia,pins = "uart4_rts_pi6";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart4_cts_pi7 {
+                               nvidia,pins = "uart4_cts_pi7";
+                               nvidia,function = "uartd";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_sda_pj0 {
+                               nvidia,pins = "gen1_i2c_sda_pj0";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen1_i2c_scl_pj1 {
+                               nvidia,pins = "gen1_i2c_scl_pj1";
+                               nvidia,function = "i2c1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       gen2_i2c_scl_pj2 {
+                               nvidia,pins = "gen2_i2c_scl_pj2";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       gen2_i2c_sda_pj3 {
+                               nvidia,pins = "gen2_i2c_sda_pj3";
+                               nvidia,function = "i2c2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dap4_fs_pj4 {
+                               nvidia,pins = "dap4_fs_pj4";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_din_pj5 {
+                               nvidia,pins = "dap4_din_pj5";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_dout_pj6 {
+                               nvidia,pins = "dap4_dout_pj6";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap4_sclk_pj7 {
+                               nvidia,pins = "dap4_sclk_pj7";
+                               nvidia,function = "i2s4b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk0 {
+                               nvidia,pins = "pk0";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk1 {
+                               nvidia,pins = "pk1";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk2 {
+                               nvidia,pins = "pk2";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk3 {
+                               nvidia,pins = "pk3";
+                               nvidia,function = "i2s5b";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk4 {
+                               nvidia,pins = "pk4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk5 {
+                               nvidia,pins = "pk5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk6 {
+                               nvidia,pins = "pk6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pk7 {
+                               nvidia,pins = "pk7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl0 {
+                               nvidia,pins = "pl0";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pl1 {
+                               nvidia,pins = "pl1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_clk_pm0 {
+                               nvidia,pins = "sdmmc1_clk_pm0";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_cmd_pm1 {
+                               nvidia,pins = "sdmmc1_cmd_pm1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat3_pm2 {
+                               nvidia,pins = "sdmmc1_dat3_pm2";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat2_pm3 {
+                               nvidia,pins = "sdmmc1_dat2_pm3";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat1_pm4 {
+                               nvidia,pins = "sdmmc1_dat1_pm4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc1_dat0_pm5 {
+                               nvidia,pins = "sdmmc1_dat0_pm5";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_clk_pp0 {
+                               nvidia,pins = "sdmmc3_clk_pp0";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_cmd_pp1 {
+                               nvidia,pins = "sdmmc3_cmd_pp1";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat3_pp2 {
+                               nvidia,pins = "sdmmc3_dat3_pp2";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat2_pp3 {
+                               nvidia,pins = "sdmmc3_dat2_pp3";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat1_pp4 {
+                               nvidia,pins = "sdmmc3_dat1_pp4";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       sdmmc3_dat0_pp5 {
+                               nvidia,pins = "sdmmc3_dat0_pp5";
+                               nvidia,function = "sdmmc3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_mclk_ps0 {
+                               nvidia,pins = "cam1_mclk_ps0";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_mclk_ps1 {
+                               nvidia,pins = "cam2_mclk_ps1";
+                               nvidia,function = "extperiph3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_scl_ps2 {
+                               nvidia,pins = "cam_i2c_scl_ps2";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_i2c_sda_ps3 {
+                               nvidia,pins = "cam_i2c_sda_ps3";
+                               nvidia,function = "i2cvi";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_rst_ps4 {
+                               nvidia,pins = "cam_rst_ps4";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_af_en_ps5 {
+                               nvidia,pins = "cam_af_en_ps5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam_flash_en_ps6 {
+                               nvidia,pins = "cam_flash_en_ps6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_pwdn_ps7 {
+                               nvidia,pins = "cam1_pwdn_ps7";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam2_pwdn_pt0 {
+                               nvidia,pins = "cam2_pwdn_pt0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cam1_strobe_pt1 {
+                               nvidia,pins = "cam1_strobe_pt1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_tx_pu0 {
+                               nvidia,pins = "uart1_tx_pu0";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rx_pu1 {
+                               nvidia,pins = "uart1_rx_pu1";
+                               nvidia,function = "uarta";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_rts_pu2 {
+                               nvidia,pins = "uart1_rts_pu2";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       uart1_cts_pu3 {
+                               nvidia,pins = "uart1_cts_pu3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_pwm_pv0 {
+                               nvidia,pins = "lcd_bl_pwm_pv0";
+                               nvidia,function = "pwm0";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_bl_en_pv1 {
+                               nvidia,pins = "lcd_bl_en_pv1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_rst_pv2 {
+                               nvidia,pins = "lcd_rst_pv2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio1_pv3 {
+                               nvidia,pins = "lcd_gpio1_pv3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_gpio2_pv4 {
+                               nvidia,pins = "lcd_gpio2_pv4";
+                               nvidia,function = "pwm1";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       ap_ready_pv5 {
+                               nvidia,pins = "ap_ready_pv5";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_rst_pv6 {
+                               nvidia,pins = "touch_rst_pv6";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_clk_pv7 {
+                               nvidia,pins = "touch_clk_pv7";
+                               nvidia,function = "touch";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       modem_wake_ap_px0 {
+                               nvidia,pins = "modem_wake_ap_px0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       touch_int_px1 {
+                               nvidia,pins = "touch_int_px1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       motion_int_px2 {
+                               nvidia,pins = "motion_int_px2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       als_prox_int_px3 {
+                               nvidia,pins = "als_prox_int_px3";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       temp_alert_px4 {
+                               nvidia,pins = "temp_alert_px4";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_power_on_px5 {
+                               nvidia,pins = "button_power_on_px5";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_up_px6 {
+                               nvidia,pins = "button_vol_up_px6";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_vol_down_px7 {
+                               nvidia,pins = "button_vol_down_px7";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_slide_sw_py0 {
+                               nvidia,pins = "button_slide_sw_py0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       button_home_py1 {
+                               nvidia,pins = "button_home_py1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       lcd_te_py2 {
+                               nvidia,pins = "lcd_te_py2";
+                               nvidia,function = "displaya";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_scl_py3 {
+                               nvidia,pins = "pwr_i2c_scl_py3";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_i2c_sda_py4 {
+                               nvidia,pins = "pwr_i2c_sda_py4";
+                               nvidia,function = "i2cpmu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_out_py5 {
+                               nvidia,pins = "clk_32k_out_py5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz0 {
+                               nvidia,pins = "pz0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz1 {
+                               nvidia,pins = "pz1";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz2 {
+                               nvidia,pins = "pz2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz3 {
+                               nvidia,pins = "pz3";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz4 {
+                               nvidia,pins = "pz4";
+                               nvidia,function = "sdmmc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pz5 {
+                               nvidia,pins = "pz5";
+                               nvidia,function = "soc";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_fs_paa0 {
+                               nvidia,pins = "dap2_fs_paa0";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_sclk_paa1 {
+                               nvidia,pins = "dap2_sclk_paa1";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_din_paa2 {
+                               nvidia,pins = "dap2_din_paa2";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dap2_dout_paa3 {
+                               nvidia,pins = "dap2_dout_paa3";
+                               nvidia,function = "i2s2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       aud_mclk_pbb0 {
+                               nvidia,pins = "aud_mclk_pbb0";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_pwm_pbb1 {
+                               nvidia,pins = "dvfs_pwm_pbb1";
+                               nvidia,function = "cldvfs";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       dvfs_clk_pbb2 {
+                               nvidia,pins = "dvfs_clk_pbb2";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x1_aud_pbb3 {
+                               nvidia,pins = "gpio_x1_aud_pbb3";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       gpio_x3_aud_pbb4 {
+                               nvidia,pins = "gpio_x3_aud_pbb4";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       hdmi_cec_pcc0 {
+                               nvidia,pins = "hdmi_cec_pcc0";
+                               nvidia,function = "cec";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       hdmi_int_dp_hpd_pcc1 {
+                               nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_out_pcc2 {
+                               nvidia,pins = "spdif_out_pcc2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       spdif_in_pcc3 {
+                               nvidia,pins = "spdif_in_pcc3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       usb_vbus_en0_pcc4 {
+                               nvidia,pins = "usb_vbus_en0_pcc4";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       usb_vbus_en1_pcc5 {
+                               nvidia,pins = "usb_vbus_en1_pcc5";
+                               nvidia,function = "usb";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+                       };
+                       dp_hpd0_pcc6 {
+                               nvidia,pins = "dp_hpd0_pcc6";
+                               nvidia,function = "dp";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pcc7 {
+                               nvidia,pins = "pcc7";
+                               nvidia,function = "rsvd0";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                               nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+                       };
+                       spi2_cs1_pdd0 {
+                               nvidia,pins = "spi2_cs1_pdd0";
+                               nvidia,function = "spi2";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_sck_pee0 {
+                               nvidia,pins = "qspi_sck_pee0";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_cs_n_pee1 {
+                               nvidia,pins = "qspi_cs_n_pee1";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io0_pee2 {
+                               nvidia,pins = "qspi_io0_pee2";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io1_pee3 {
+                               nvidia,pins = "qspi_io1_pee3";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io2_pee4 {
+                               nvidia,pins = "qspi_io2_pee4";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       qspi_io3_pee5 {
+                               nvidia,pins = "qspi_io3_pee5";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       core_pwr_req {
+                               nvidia,pins = "core_pwr_req";
+                               nvidia,function = "core";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       cpu_pwr_req {
+                               nvidia,pins = "cpu_pwr_req";
+                               nvidia,function = "cpu";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       pwr_int_n {
+                               nvidia,pins = "pwr_int_n";
+                               nvidia,function = "pmi";
+                               nvidia,pull = <TEGRA_PIN_PULL_UP>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_32k_in {
+                               nvidia,pins = "clk_32k_in";
+                               nvidia,function = "clk";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       jtag_rtck {
+                               nvidia,pins = "jtag_rtck";
+                               nvidia,function = "jtag";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       clk_req {
+                               nvidia,pins = "clk_req";
+                               nvidia,function = "rsvd1";
+                               nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+                               nvidia,tristate = <TEGRA_PIN_ENABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+                       shutdown {
+                               nvidia,pins = "shutdown";
+                               nvidia,function = "shutdown";
+                               nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+                               nvidia,tristate = <TEGRA_PIN_DISABLE>;
+                               nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+                               nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+                       };
+               };
+       };
+
+       /* MMC/SD */
+       sdhci@0,700b0000 {
+               status = "okay";
+               bus-width = <4>;
+               no-1-8-v;
+
+               cd-gpios = <&gpio TEGRA_GPIO(Z, 1) GPIO_ACTIVE_LOW>;
+       };
+};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
new file mode 100644 (file)
index 0000000..bc23f4d
--- /dev/null
@@ -0,0 +1,805 @@
+#include <dt-bindings/clock/tegra210-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra210-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+       compatible = "nvidia,tegra210";
+       interrupt-parent = <&lic>;
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       host1x@0,50000000 {
+               compatible = "nvidia,tegra210-host1x", "simple-bus";
+               reg = <0x0 0x50000000 0x0 0x00034000>;
+               interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+                            <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+               clocks = <&tegra_car TEGRA210_CLK_HOST1X>;
+               clock-names = "host1x";
+               resets = <&tegra_car 28>;
+               reset-names = "host1x";
+
+               #address-cells = <2>;
+               #size-cells = <2>;
+
+               ranges = <0x0 0x54000000 0x0 0x54000000 0x0 0x01000000>;
+
+               dpaux1: dpaux@0,54040000 {
+                       compatible = "nvidia,tegra210-dpaux";
+                       reg = <0x0 0x54040000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DPAUX1>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 207>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+
+               vi@0,54080000 {
+                       compatible = "nvidia,tegra210-vi";
+                       reg = <0x0 0x54080000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               tsec@0,54100000 {
+                       compatible = "nvidia,tegra210-tsec";
+                       reg = <0x0 0x54100000 0x0 0x00040000>;
+               };
+
+               dc@0,54200000 {
+                       compatible = "nvidia,tegra210-dc";
+                       reg = <0x0 0x54200000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DISP1>,
+                                <&tegra_car TEGRA210_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 27>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DC>;
+
+                       nvidia,head = <0>;
+               };
+
+               dc@0,54240000 {
+                       compatible = "nvidia,tegra210-dc";
+                       reg = <0x0 0x54240000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DISP2>,
+                                <&tegra_car TEGRA210_CLK_PLL_P>;
+                       clock-names = "dc", "parent";
+                       resets = <&tegra_car 26>;
+                       reset-names = "dc";
+
+                       iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+                       nvidia,head = <1>;
+               };
+
+               dsi@0,54300000 {
+                       compatible = "nvidia,tegra210-dsi";
+                       reg = <0x0 0x54300000 0x0 0x00040000>;
+                       clocks = <&tegra_car TEGRA210_CLK_DSIA>,
+                                <&tegra_car TEGRA210_CLK_DSIALP>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+                       clock-names = "dsi", "lp", "parent";
+                       resets = <&tegra_car 48>;
+                       reset-names = "dsi";
+                       nvidia,mipi-calibrate = <&mipi 0x0c0>; /* DSIA & DSIB pads */
+
+                       status = "disabled";
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
+               vic@0,54340000 {
+                       compatible = "nvidia,tegra210-vic";
+                       reg = <0x0 0x54340000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               nvjpg@0,54380000 {
+                       compatible = "nvidia,tegra210-nvjpg";
+                       reg = <0x0 0x54380000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               dsi@0,54400000 {
+                       compatible = "nvidia,tegra210-dsi";
+                       reg = <0x0 0x54400000 0x0 0x00040000>;
+                       clocks = <&tegra_car TEGRA210_CLK_DSIB>,
+                                <&tegra_car TEGRA210_CLK_DSIBLP>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+                       clock-names = "dsi", "lp", "parent";
+                       resets = <&tegra_car 82>;
+                       reset-names = "dsi";
+                       nvidia,mipi-calibrate = <&mipi 0x300>; /* DSIC & DSID pads */
+
+                       status = "disabled";
+
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
+               nvdec@0,54480000 {
+                       compatible = "nvidia,tegra210-nvdec";
+                       reg = <0x0 0x54480000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               nvenc@0,544c0000 {
+                       compatible = "nvidia,tegra210-nvenc";
+                       reg = <0x0 0x544c0000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               tsec@0,54500000 {
+                       compatible = "nvidia,tegra210-tsec";
+                       reg = <0x0 0x54500000 0x0 0x00040000>;
+                       status = "disabled";
+               };
+
+               sor@0,54540000 {
+                       compatible = "nvidia,tegra210-sor";
+                       reg = <0x0 0x54540000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_SOR0>,
+                                <&tegra_car TEGRA210_CLK_PLL_D_OUT0>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>,
+                                <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 182>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               sor@0,54580000 {
+                       compatible = "nvidia,tegra210-sor1";
+                       reg = <0x0 0x54580000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_SOR1>,
+                                <&tegra_car TEGRA210_CLK_PLL_D2_OUT0>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>,
+                                <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+                       clock-names = "sor", "parent", "dp", "safe";
+                       resets = <&tegra_car 183>;
+                       reset-names = "sor";
+                       status = "disabled";
+               };
+
+               dpaux: dpaux@0,545c0000 {
+                       compatible = "nvidia,tegra124-dpaux";
+                       reg = <0x0 0x545c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&tegra_car TEGRA210_CLK_DPAUX>,
+                                <&tegra_car TEGRA210_CLK_PLL_DP>;
+                       clock-names = "dpaux", "parent";
+                       resets = <&tegra_car 181>;
+                       reset-names = "dpaux";
+                       status = "disabled";
+               };
+
+               isp@0,54600000 {
+                       compatible = "nvidia,tegra210-isp";
+                       reg = <0x0 0x54600000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               isp@0,54680000 {
+                       compatible = "nvidia,tegra210-isp";
+                       reg = <0x0 0x54680000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+
+               i2c@0,546c0000 {
+                       compatible = "nvidia,tegra210-i2c-vi";
+                       reg = <0x0 0x546c0000 0x0 0x00040000>;
+                       interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+                       status = "disabled";
+               };
+       };
+
+       gic: interrupt-controller@0,50041000 {
+               compatible = "arm,gic-400";
+               #interrupt-cells = <3>;
+               interrupt-controller;
+               reg = <0x0 0x50041000 0x0 0x1000>,
+                     <0x0 0x50042000 0x0 0x2000>,
+                     <0x0 0x50044000 0x0 0x2000>,
+                     <0x0 0x50046000 0x0 0x2000>;
+               interrupts = <GIC_PPI 9
+                       (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+               interrupt-parent = <&gic>;
+       };
+
+       gpu@0,57000000 {
+               compatible = "nvidia,gm20b";
+               reg = <0x0 0x57000000 0x0 0x01000000>,
+                     <0x0 0x58000000 0x0 0x01000000>;
+               interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "stall", "nonstall";
+               clocks = <&tegra_car TEGRA210_CLK_GPU>,
+                        <&tegra_car TEGRA210_CLK_PLL_P_OUT5>;
+               clock-names = "gpu", "pwr";
+               resets = <&tegra_car 184>;
+               reset-names = "gpu";
+               status = "disabled";
+       };
+
+       lic: interrupt-controller@0,60004000 {
+               compatible = "nvidia,tegra210-ictlr";
+               reg = <0x0 0x60004000 0x0 0x40>, /* primary controller */
+                     <0x0 0x60004100 0x0 0x40>, /* secondary controller */
+                     <0x0 0x60004200 0x0 0x40>, /* tertiary controller */
+                     <0x0 0x60004300 0x0 0x40>, /* quaternary controller */
+                     <0x0 0x60004400 0x0 0x40>, /* quinary controller */
+                     <0x0 0x60004500 0x0 0x40>; /* senary controller */
+               interrupt-controller;
+               #interrupt-cells = <3>;
+               interrupt-parent = <&gic>;
+       };
+
+       timer@0,60005000 {
+               compatible = "nvidia,tegra210-timer", "nvidia,tegra20-timer";
+               reg = <0x0 0x60005000 0x0 0x400>;
+               interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+               clock-names = "timer";
+       };
+
+       tegra_car: clock@0,60006000 {
+               compatible = "nvidia,tegra210-car";
+               reg = <0x0 0x60006000 0x0 0x1000>;
+               #clock-cells = <1>;
+               #reset-cells = <1>;
+       };
+
+       flow-controller@0,60007000 {
+               compatible = "nvidia,tegra210-flowctrl";
+               reg = <0x0 0x60007000 0x0 0x1000>;
+       };
+
+       gpio: gpio@0,6000d000 {
+               compatible = "nvidia,tegra210-gpio", "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+               reg = <0x0 0x6000d000 0x0 0x1000>;
+               interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               #interrupt-cells = <2>;
+               interrupt-controller;
+       };
+
+       apbdma: dma@0,60020000 {
+               compatible = "nvidia,tegra210-apbdma", "nvidia,tegra148-apbdma";
+               reg = <0x0 0x60020000 0x0 0x1400>;
+               interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+                            <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_APBDMA>;
+               clock-names = "dma";
+               resets = <&tegra_car 34>;
+               reset-names = "dma";
+               #dma-cells = <1>;
+       };
+
+       apbmisc@0,70000800 {
+               compatible = "nvidia,tegra210-apbmisc", "nvidia,tegra20-apbmisc";
+               reg = <0x0 0x70000800 0x0 0x64>,   /* Chip revision */
+                     <0x0 0x7000e864 0x0 0x04>;   /* Strapping options */
+       };
+
+       pinmux: pinmux@0,700008d4 {
+               compatible = "nvidia,tegra210-pinmux";
+               reg = <0x0 0x700008d4 0x0 0x29c>, /* Pad control registers */
+                     <0x0 0x70003000 0x0 0x294>; /* Mux registers */
+       };
+
+       /*
+        * There are two serial driver i.e. 8250 based simple serial
+        * driver and APB DMA based serial driver for higher baudrate
+        * and performace. To enable the 8250 based driver, the compatible
+        * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+        * the APB DMA based serial driver, the comptible is
+        * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+        */
+       uarta: serial@0,70006000 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006000 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTA>;
+               clock-names = "serial";
+               resets = <&tegra_car 6>;
+               reset-names = "serial";
+               dmas = <&apbdma 8>, <&apbdma 8>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartb: serial@0,70006040 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006040 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTB>;
+               clock-names = "serial";
+               resets = <&tegra_car 7>;
+               reset-names = "serial";
+               dmas = <&apbdma 9>, <&apbdma 9>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartc: serial@0,70006200 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006200 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTC>;
+               clock-names = "serial";
+               resets = <&tegra_car 55>;
+               reset-names = "serial";
+               dmas = <&apbdma 10>, <&apbdma 10>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       uartd: serial@0,70006300 {
+               compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+               reg = <0x0 0x70006300 0x0 0x40>;
+               reg-shift = <2>;
+               interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_UARTD>;
+               clock-names = "serial";
+               resets = <&tegra_car 65>;
+               reset-names = "serial";
+               dmas = <&apbdma 19>, <&apbdma 19>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       pwm: pwm@0,7000a000 {
+               compatible = "nvidia,tegra210-pwm", "nvidia,tegra20-pwm";
+               reg = <0x0 0x7000a000 0x0 0x100>;
+               #pwm-cells = <2>;
+               clocks = <&tegra_car TEGRA210_CLK_PWM>;
+               clock-names = "pwm";
+               resets = <&tegra_car 17>;
+               reset-names = "pwm";
+               status = "disabled";
+       };
+
+       i2c@0,7000c000 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c000 0x0 0x100>;
+               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C1>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 12>;
+               reset-names = "i2c";
+               dmas = <&apbdma 21>, <&apbdma 21>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c400 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c400 0x0 0x100>;
+               interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C2>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 54>;
+               reset-names = "i2c";
+               dmas = <&apbdma 22>, <&apbdma 22>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c500 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c500 0x0 0x100>;
+               interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C3>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 67>;
+               reset-names = "i2c";
+               dmas = <&apbdma 23>, <&apbdma 23>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000c700 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000c700 0x0 0x100>;
+               interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C4>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 103>;
+               reset-names = "i2c";
+               dmas = <&apbdma 26>, <&apbdma 26>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d000 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d000 0x0 0x100>;
+               interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C5>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 47>;
+               reset-names = "i2c";
+               dmas = <&apbdma 24>, <&apbdma 24>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       i2c@0,7000d100 {
+               compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+               reg = <0x0 0x7000d100 0x0 0x100>;
+               interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_I2C6>;
+               clock-names = "div-clk";
+               resets = <&tegra_car 166>;
+               reset-names = "i2c";
+               dmas = <&apbdma 30>, <&apbdma 30>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d400 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d400 0x0 0x200>;
+               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC1>;
+               clock-names = "spi";
+               resets = <&tegra_car 41>;
+               reset-names = "spi";
+               dmas = <&apbdma 15>, <&apbdma 15>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d600 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d600 0x0 0x200>;
+               interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC2>;
+               clock-names = "spi";
+               resets = <&tegra_car 44>;
+               reset-names = "spi";
+               dmas = <&apbdma 16>, <&apbdma 16>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000d800 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000d800 0x0 0x200>;
+               interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC3>;
+               clock-names = "spi";
+               resets = <&tegra_car 46>;
+               reset-names = "spi";
+               dmas = <&apbdma 17>, <&apbdma 17>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       spi@0,7000da00 {
+               compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+               reg = <0x0 0x7000da00 0x0 0x200>;
+               interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_SBC4>;
+               clock-names = "spi";
+               resets = <&tegra_car 68>;
+               reset-names = "spi";
+               dmas = <&apbdma 18>, <&apbdma 18>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       rtc@0,7000e000 {
+               compatible = "nvidia,tegra210-rtc", "nvidia,tegra20-rtc";
+               reg = <0x0 0x7000e000 0x0 0x100>;
+               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_RTC>;
+               clock-names = "rtc";
+       };
+
+       pmc: pmc@0,7000e400 {
+               compatible = "nvidia,tegra210-pmc";
+               reg = <0x0 0x7000e400 0x0 0x400>;
+               clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
+               clock-names = "pclk", "clk32k_in";
+
+               #power-domain-cells = <1>;
+       };
+
+       fuse@0,7000f800 {
+               compatible = "nvidia,tegra210-efuse";
+               reg = <0x0 0x7000f800 0x0 0x400>;
+               clocks = <&tegra_car TEGRA210_CLK_FUSE>;
+               clock-names = "fuse";
+               resets = <&tegra_car 39>;
+               reset-names = "fuse";
+       };
+
+       mc: memory-controller@0,70019000 {
+               compatible = "nvidia,tegra210-mc";
+               reg = <0x0 0x70019000 0x0 0x1000>;
+               clocks = <&tegra_car TEGRA210_CLK_MC>;
+               clock-names = "mc";
+
+               interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+               #iommu-cells = <1>;
+       };
+
+       hda@0,70030000 {
+               compatible = "nvidia,tegra210-hda", "nvidia,tegra30-hda";
+               reg = <0x0 0x70030000 0x0 0x10000>;
+               interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_HDA>,
+                        <&tegra_car TEGRA210_CLK_HDA2HDMI>,
+                        <&tegra_car TEGRA210_CLK_HDA2CODEC_2X>;
+               clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+               resets = <&tegra_car 125>, /* hda */
+                        <&tegra_car 128>, /* hda2hdmi */
+                        <&tegra_car 111>; /* hda2codec_2x */
+               reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0000 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0000 0x0 0x200>;
+               interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 14>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0200 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0200 0x0 0x200>;
+               interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC2>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 9>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0400 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0400 0x0 0x200>;
+               interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC3>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 69>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       sdhci@0,700b0600 {
+               compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+               reg = <0x0 0x700b0600 0x0 0x200>;
+               interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&tegra_car TEGRA210_CLK_SDMMC4>;
+               clock-names = "sdhci";
+               resets = <&tegra_car 15>;
+               reset-names = "sdhci";
+               status = "disabled";
+       };
+
+       mipi: mipi@0,700e3000 {
+               compatible = "nvidia,tegra210-mipi";
+               reg = <0x0 0x700e3000 0x0 0x100>;
+               clocks = <&tegra_car TEGRA210_CLK_MIPI_CAL>;
+               clock-names = "mipi-cal";
+               #nvidia,mipi-calibrate-cells = <1>;
+       };
+
+       spi@0,70410000 {
+               compatible = "nvidia,tegra210-qspi";
+               reg = <0x0 0x70410000 0x0 0x1000>;
+               interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               clocks = <&tegra_car TEGRA210_CLK_QSPI>;
+               clock-names = "qspi";
+               resets = <&tegra_car 211>;
+               reset-names = "qspi";
+               dmas = <&apbdma 5>, <&apbdma 5>;
+               dma-names = "rx", "tx";
+               status = "disabled";
+       };
+
+       usb@0,7d000000 {
+               compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d000000 0x0 0x4000>;
+               interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "usb";
+               resets = <&tegra_car 22>;
+               reset-names = "usb";
+               nvidia,phy = <&phy1>;
+               status = "disabled";
+       };
+
+       phy1: usb-phy@0,7d000000 {
+               compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d000000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USBD>,
+                        <&tegra_car TEGRA210_CLK_PLL_U>,
+                        <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 22>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
+               status = "disabled";
+       };
+
+       usb@0,7d004000 {
+               compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+               reg = <0x0 0x7d004000 0x0 0x4000>;
+               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USB2>;
+               clock-names = "usb";
+               resets = <&tegra_car 58>;
+               reset-names = "usb";
+               nvidia,phy = <&phy2>;
+               status = "disabled";
+       };
+
+       phy2: usb-phy@0,7d004000 {
+               compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+               reg = <0x0 0x7d004000 0x0 0x4000>,
+                     <0x0 0x7d000000 0x0 0x4000>;
+               phy_type = "utmi";
+               clocks = <&tegra_car TEGRA210_CLK_USB2>,
+                        <&tegra_car TEGRA210_CLK_PLL_U>,
+                        <&tegra_car TEGRA210_CLK_USBD>;
+               clock-names = "reg", "pll_u", "utmi-pads";
+               resets = <&tegra_car 58>, <&tegra_car 22>;
+               reset-names = "usb", "utmi-pads";
+               nvidia,hssync-start-delay = <0>;
+               nvidia,idle-wait-delay = <17>;
+               nvidia,elastic-limit = <16>;
+               nvidia,term-range-adj = <6>;
+               nvidia,xcvr-setup = <9>;
+               nvidia,xcvr-lsfslew = <0>;
+               nvidia,xcvr-lsrslew = <3>;
+               nvidia,hssquelch-level = <2>;
+               nvidia,hsdiscon-level = <5>;
+               nvidia,xcvr-hsslew = <12>;
+               status = "disabled";
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <1>;
+               };
+
+               cpu@2 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <2>;
+               };
+
+               cpu@3 {
+                       device_type = "cpu";
+                       compatible = "arm,cortex-a57";
+                       reg = <3>;
+               };
+       };
+
+       timer {
+               compatible = "arm,armv8-timer";
+               interrupts = <GIC_PPI 13
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 10
+                               (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+               interrupt-parent = <&gic>;
+       };
+};
index 18ca9fb9e65f667c0679b3dbdb80166442723bb7..86581f793e398ba29eb1e12e77498405cdeb2e6d 100644 (file)
@@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
index 007a69fc4f408d5f2f7e58f4070b6cb354a5e022..5f3ab8c1db55cca8dbf4c9e1fc315e90335b6d60 100644 (file)
@@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                return -EFAULT;
 
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      sub     %w3, %w1, %w4\n"
@@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "      .align  3\n"
 "      .quad   1b, 4b, 2b, 4b\n"
 "      .popsection\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
        : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
        : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
        : "memory");
index 738a95f93e493e3002ac8749857f8599c5ae3404..bef6e9243c636b2aab05688f79de4f37a00154b3 100644 (file)
 #define CPTR_EL2_TCPAC (1 << 31)
 #define CPTR_EL2_TTA   (1 << 20)
 #define CPTR_EL2_TFP   (1 << CPTR_EL2_TFP_SHIFT)
+#define CPTR_EL2_DEFAULT       0x000033ff
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_TDRA          (1 << 11)
index 3066328cd86b69a91274e0cb841059b428666140..779a5872a2c5fb5f9aa9b49af6f77391aefc2336 100644 (file)
@@ -127,10 +127,14 @@ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
-       u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+       u32 mode;
 
-       if (vcpu_mode_is_32bit(vcpu))
+       if (vcpu_mode_is_32bit(vcpu)) {
+               mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
                return mode > COMPAT_PSR_MODE_USR;
+       }
+
+       mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
        return mode != PSR_MODE_EL0t;
 }
index 9b2f5a9d019df493fa6021ee3ca6b4779401d8c4..ae615b9d9a551bab47bb4900e867c8d15db02bb1 100644 (file)
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
 #include <asm/pgtable-types.h>
 
 extern void __cpu_clear_user_page(void *p, unsigned long user);
index 2d545d7aa80ba715f65cc82b671213fa5505b495..bf464de33f52f77d2520db0a12e53b3749ced6cc 100644 (file)
@@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT           (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO         __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX        __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +153,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)         (!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)          (!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)          (!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)          (!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)      (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)         (pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)         (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-       ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
-       if (pte_valid_user(pte)) {
-               if (!pte_special(pte) && pte_exec(pte))
-                       __sync_icache_dcache(pte, addr);
+       if (pte_valid(pte)) {
                if (pte_sw_dirty(pte) && pte_write(pte))
                        pte_val(pte) &= ~PTE_RDONLY;
                else
                        pte_val(pte) |= PTE_RDONLY;
+               if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+                       __sync_icache_dcache(pte, addr);
        }
 
        /*
index ffe9c2b6431bd5c0cb1e9982afe84e13a82e2043..917d98108b3f05d9b1013020f9f576a3db776bc6 100644 (file)
@@ -514,9 +514,14 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )       // Clear EE and E0E on LE systems
 #endif
 
        /* EL2 debug */
+       mrs     x0, id_aa64dfr0_el1             // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    x0, x0, #8, #4
+       cmp     x0, #1
+       b.lt    4f                              // Skip if no PMU present
        mrs     x0, pmcr_el0                    // Disable debug access traps
        ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
        msr     mdcr_el2, x0                    // all PMU counters from EL1
+4:
 
        /* Stage-2 translation */
        msr     vttbr_el2, xzr
index bc2abb8b1599576ae2dec02bce0c46c48fc707dd..999633bd7294aab399183bd3bcf33c64ce8a6e2c 100644 (file)
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)     ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp               = __pi_memcmp;
-__efistub_memchr               = __pi_memchr;
-__efistub_memcpy               = __pi_memcpy;
-__efistub_memmove              = __pi_memmove;
-__efistub_memset               = __pi_memset;
-__efistub_strlen               = __pi_strlen;
-__efistub_strcmp               = __pi_strcmp;
-__efistub_strncmp              = __pi_strncmp;
-__efistub___flush_dcache_area  = __pi___flush_dcache_area;
+__efistub_memcmp               = KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr               = KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy               = KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove              = KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset               = KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen               = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp               = KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp              = KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area  = KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy             = __pi_memcpy;
-__efistub___memmove            = __pi_memmove;
-__efistub___memset             = __pi_memset;
+__efistub___memcpy             = KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove            = KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset             = KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text                        = _text;
-__efistub__end                 = _end;
-__efistub__edata               = _edata;
+__efistub__text                        = KALLSYMS_HIDE(_text);
+__efistub__end                 = KALLSYMS_HIDE(_end);
+__efistub__edata               = KALLSYMS_HIDE(_edata);
 
 #endif
 
index ca8f5a5e2f965748fca28ced482c9c195270e5ab..f0e7bdfae134a727ec7c0ac76466020fdb65e0fb 100644 (file)
@@ -36,7 +36,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
        write_sysreg(val, hcr_el2);
        /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
        write_sysreg(1 << 15, hstr_el2);
-       write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+
+       val = CPTR_EL2_DEFAULT;
+       val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+       write_sysreg(val, cptr_el2);
+
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
@@ -45,7 +49,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
        write_sysreg(HCR_RW, hcr_el2);
        write_sysreg(0, hstr_el2);
        write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
-       write_sysreg(0, cptr_el2);
+       write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
 static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
index 648112e90ed546d2d052ccf7d9f66866d2390d06..4d1ac81870d27e6f272abde088e0f5e8290c80d1 100644 (file)
 
 #define PSTATE_FAULT_BITS_64   (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
                                 PSR_I_BIT | PSR_D_BIT)
-#define EL1_EXCEPT_SYNC_OFFSET 0x200
+
+#define CURRENT_EL_SP_EL0_VECTOR       0x0
+#define CURRENT_EL_SP_ELx_VECTOR       0x200
+#define LOWER_EL_AArch64_VECTOR                0x400
+#define LOWER_EL_AArch32_VECTOR                0x600
 
 static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 {
@@ -97,6 +101,34 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
                *fsr = 0x14;
 }
 
+enum exception_type {
+       except_type_sync        = 0,
+       except_type_irq         = 0x80,
+       except_type_fiq         = 0x100,
+       except_type_serror      = 0x180,
+};
+
+static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
+{
+       u64 exc_offset;
+
+       switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
+       case PSR_MODE_EL1t:
+               exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+               break;
+       case PSR_MODE_EL1h:
+               exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+               break;
+       case PSR_MODE_EL0t:
+               exc_offset = LOWER_EL_AArch64_VECTOR;
+               break;
+       default:
+               exc_offset = LOWER_EL_AArch32_VECTOR;
+       }
+
+       return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
        unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -108,8 +140,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
        *vcpu_spsr(vcpu) = cpsr;
        *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+       *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
        *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-       *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
        vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
@@ -143,8 +175,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
        *vcpu_spsr(vcpu) = cpsr;
        *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 
+       *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
        *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-       *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
 
        /*
         * Build an unknown exception, depending on the instruction
index eec3598b4184077b83b5a1f24321891cb110f5bb..2e90371cfb378b0e064667506a2b74c9275cacfb 100644 (file)
@@ -1007,10 +1007,9 @@ static int emulate_cp(struct kvm_vcpu *vcpu,
                if (likely(r->access(vcpu, params, r))) {
                        /* Skip instruction, since it was emulated */
                        kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+                       /* Handled */
+                       return 0;
                }
-
-               /* Handled */
-               return 0;
        }
 
        /* Not handled */
@@ -1043,7 +1042,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 }
 
 /**
- * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
@@ -1095,7 +1094,7 @@ out:
 }
 
 /**
- * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
index 5a22a119a74c87b4b5b54e114701b3c6eed233e6..0adbebbc28037110afd911a5cfe14693101e19ea 100644 (file)
@@ -46,7 +46,7 @@ enum address_markers_idx {
        PCI_START_NR,
        PCI_END_NR,
        MODULES_START_NR,
-       MODUELS_END_NR,
+       MODULES_END_NR,
        KERNEL_SPACE_NR,
 };
 
index cf038c7d9fa994c7d86e05920ffa8961aecc4ad8..cab7a5be40aa85cbd933635d48208d2af54bcf1c 100644 (file)
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
        struct memblock_region *reg;
+       int i;
 
        /*
         * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
                                pfn_to_nid(virt_to_pfn(start)));
        }
 
+       /*
+        * KAsan may reuse the contents of kasan_zero_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_zero_pte[i],
+                       pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
        memset(kasan_zero_page, 0, PAGE_SIZE);
        cpu_set_ttbr1(__pa(swapper_pg_dir));
        flush_tlb_all();
index 3571c7309c5e79f0d2d3e20986a581ffb6454dae..0795c3a36d8f0d140cf4952bd91b14962e7f81b4 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
        unsigned long end = start + size;
        int ret;
        struct page_change_data data;
+       struct vm_struct *area;
 
        if (!PAGE_ALIGNED(addr)) {
                start &= PAGE_MASK;
@@ -51,11 +53,27 @@ static int change_memory_common(unsigned long addr, int numpages,
                WARN_ON_ONCE(1);
        }
 
-       if (start < MODULES_VADDR || start >= MODULES_END)
+       /*
+        * Kernel VA mappings are always live, and splitting live section
+        * mappings into page mappings may cause TLB conflicts. This means
+        * we have to ensure that changing the permission bits of the range
+        * we are operating on does not result in such splitting.
+        *
+        * Let's restrict ourselves to mappings created by vmalloc (or vmap).
+        * Those are guaranteed to consist entirely of page mappings, and
+        * splitting is never needed.
+        *
+        * So check whether the [addr, addr + size) interval is entirely
+        * covered by precisely one VM area that has the VM_ALLOC flag set.
+        */
+       area = find_vm_area((void *)addr);
+       if (!area ||
+           end > (unsigned long)area->addr + area->size ||
+           !(area->flags & VM_ALLOC))
                return -EINVAL;
 
-       if (end < MODULES_VADDR || end >= MODULES_END)
-               return -EINVAL;
+       if (!numpages)
+               return 0;
 
        data.set_mask = set_mask;
        data.clear_mask = clear_mask;
index 146bd99a7532bcc6f53ad509a839561640eeaf3e..e6a30e1268a8c7570c8298797d7cf27dcb0d649f 100644 (file)
        b.lo    9998b
        dsb     \domain
        .endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+       .macro  reset_pmuserenr_el0, tmpreg
+       mrs     \tmpreg, id_aa64dfr0_el1        // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    \tmpreg, \tmpreg, #8, #4
+       cmp     \tmpreg, #1                     // Skip if no PMU present
+       b.lt    9000f
+       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+9000:
+       .endm
index a3d867e723b4f0c5c23283c89264d4a2f7f51561..c164d2cb35c05f12e7266592ee52a3b425483670 100644 (file)
@@ -117,7 +117,7 @@ ENTRY(cpu_do_resume)
         */
        ubfx    x11, x11, #1, #1
        msr     oslar_el1, x11
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        mov     x0, x12
        dsb     nsh             // Make sure local tlb invalidation completed
        isb
@@ -154,7 +154,7 @@ ENTRY(__cpu_setup)
        msr     cpacr_el1, x0                   // Enable FP/ASIMD
        mov     x0, #1 << 12                    // Reset mdscr_el1 and disable
        msr     mdscr_el1, x0                   // access to the DCC from EL0
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        /*
         * Memory region attributes for LPAE:
         *
index 74c132d901bd290bcc803b7c0a0266428a2ec1cb..6a8685051b676196780cab79a495fc6d906ccce7 100644 (file)
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls                    323 /* length of syscall table */
+#define NR_syscalls                    324 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
index 762edce7572eb33f03a3402e4357de69626764c9..41369a103838a6e3d8776f550885399d9006e60e 100644 (file)
 #define __NR_membarrier                        1344
 #define __NR_kcmp                      1345
 #define __NR_mlock2                    1346
+#define __NR_copy_file_range           1347
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
index 534a74acb849d736726b5f066675f0a3a4c87ed4..477c55e244ecd45ce879dea27779967a16e0d13c 100644 (file)
@@ -1772,5 +1772,6 @@ sys_call_table:
        data8 sys_membarrier
        data8 sys_kcmp                          // 1345
        data8 sys_mlock2
+       data8 sys_copy_file_range
 
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
index 836ac5a963c83140d18dcd762459d19e1fd0c101..2841c0a3fd3bb201a4bed53d823858b32a0a009b 100644 (file)
@@ -276,6 +276,7 @@ source "kernel/Kconfig.preempt"
 
 config SMP
        bool "Symmetric multi-processing support"
+       depends on MMU
        ---help---
          This enables support for systems with more than one CPU. If you have
          a system with only one CPU, say N. If you have a system with more
index a96c81d1d22e6afce0973161639024e93346fd0b..c5cd63a4b6d53b399e91e8ebad7358a3f45fce8c 100644 (file)
@@ -21,6 +21,7 @@ platforms += mti-malta
 platforms += mti-sead3
 platforms += netlogic
 platforms += paravirt
+platforms += pic32
 platforms += pistachio
 platforms += pmcs-msp71xx
 platforms += pnx833x
index fbf3f6670b69a58f23bf3878eb0d5d2df3edeaec..57a945e832f43ff711fe04d1fa8a52fe1970f000 100644 (file)
@@ -169,6 +169,7 @@ config BMIPS_GENERIC
        select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+       select ARCH_WANT_OPTIONAL_GPIOLIB
        help
          Build a generic DT-based kernel image that boots on select
          BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -480,6 +481,14 @@ config MIPS_MALTA
          This enables support for the MIPS Technologies Malta evaluation
          board.
 
+config MACH_PIC32
+       bool "Microchip PIC32 Family"
+       help
+         This enables support for the Microchip PIC32 family of platforms.
+
+         Microchip PIC32 is a family of general-purpose 32 bit MIPS core
+         microcontrollers.
+
 config MIPS_SEAD3
        bool "MIPS SEAD3 board"
        select BOOT_ELF32
@@ -979,6 +988,7 @@ source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
 source "arch/mips/lantiq/Kconfig"
 source "arch/mips/lasat/Kconfig"
+source "arch/mips/pic32/Kconfig"
 source "arch/mips/pistachio/Kconfig"
 source "arch/mips/pmcs-msp71xx/Kconfig"
 source "arch/mips/ralink/Kconfig"
@@ -1755,6 +1765,10 @@ config SYS_SUPPORTS_ZBOOT_UART16550
        bool
        select SYS_SUPPORTS_ZBOOT
 
+config SYS_SUPPORTS_ZBOOT_UART_PROM
+       bool
+       select SYS_SUPPORTS_ZBOOT
+
 config CPU_LOONGSON2
        bool
        select CPU_SUPPORTS_32BIT_KERNEL
@@ -2017,7 +2031,8 @@ config KVM_GUEST
        bool "KVM Guest Kernel"
        depends on BROKEN_ON_SMP
        help
-         Select this option if building a guest kernel for KVM (Trap & Emulate) mode
+         Select this option if building a guest kernel for KVM (Trap & Emulate)
+         mode.
 
 config KVM_GUEST_TIMER_FREQ
        int "Count/Compare Timer Frequency (MHz)"
index 3f70ba54ae21e909973ad9913eb71151fcc50492..e78d60dbdffdad93994421e684a87e2f34f8b802 100644 (file)
@@ -166,16 +166,6 @@ cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
 endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,-mips32 -Wa,--trap
-#
-# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
-# as MIPS64 R1; older versions as just R1.  This leaves the possibility open
-# that GCC might generate R2 code for -march=loongson3a which then is rejected
-# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
-# can't easily be used safely within the kbuild framework.
-#
-cflags-$(CONFIG_CPU_LOONGSON3)  +=                                     \
-       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
-       -Wa,-mips64r2 -Wa,--trap
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS) += $(call cc-option,-mfix-r4400,)
index f9bc4f520440142b2be4b033c8166f684bd1fc75..84548f704035391d9715d2aeee0a8845735f8dd1 100644 (file)
@@ -40,7 +40,7 @@
 
 static int gpio2_get(struct gpio_chip *chip, unsigned offset)
 {
-       return alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
+       return !!alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
 }
 
 static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value)
@@ -68,7 +68,7 @@ static int gpio2_to_irq(struct gpio_chip *chip, unsigned offset)
 
 static int gpio1_get(struct gpio_chip *chip, unsigned offset)
 {
-       return alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
+       return !!alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
 }
 
 static void gpio1_set(struct gpio_chip *chip,
@@ -119,7 +119,7 @@ struct gpio_chip alchemy_gpio_chip[] = {
 
 static int alchemy_gpic_get(struct gpio_chip *chip, unsigned int off)
 {
-       return au1300_gpio_get_value(off + AU1300_GPIO_BASE);
+       return !!au1300_gpio_get_value(off + AU1300_GPIO_BASE);
 }
 
 static void alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v)
index f4930456eb8e8181098c6035dccdd1203d72ea18..f969f583c68c66c0512d7c648dc16d22c18158f6 100644 (file)
@@ -37,7 +37,7 @@ static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
                                container_of(chip, struct ar7_gpio_chip, chip);
        void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT;
 
-       return readl(gpio_in) & (1 << gpio);
+       return !!(readl(gpio_in) & (1 << gpio));
 }
 
 static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
index ca7cc19adfea7f01154c55f44e47b5f863499843..870c6b2e97e835127dc88c6ca4bc4219faa44d92 100644 (file)
@@ -23,7 +23,6 @@ void ath79_clocks_init(void);
 unsigned long ath79_get_sys_clk_rate(const char *id);
 
 void ath79_ddr_ctrl_init(void);
-void ath79_ddr_wb_flush(unsigned int reg);
 
 void ath79_gpio_init(void);
 
index eeb3953ed8ac8d8051ccbe3df9af8270915329b3..511c06560dc1f5a95866f832ec9ae2180048b9d5 100644 (file)
 #include "common.h"
 #include "machtypes.h"
 
+static void __init ath79_misc_intc_domain_init(
+       struct device_node *node, int irq);
+
 static void ath79_misc_irq_handler(struct irq_desc *desc)
 {
-       void __iomem *base = ath79_reset_base;
+       struct irq_domain *domain = irq_desc_get_handler_data(desc);
+       void __iomem *base = domain->host_data;
        u32 pending;
 
        pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
@@ -42,15 +46,15 @@ static void ath79_misc_irq_handler(struct irq_desc *desc)
        while (pending) {
                int bit = __ffs(pending);
 
-               generic_handle_irq(ATH79_MISC_IRQ(bit));
+               generic_handle_irq(irq_linear_revmap(domain, bit));
                pending &= ~BIT(bit);
        }
 }
 
 static void ar71xx_misc_irq_unmask(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -62,8 +66,8 @@ static void ar71xx_misc_irq_unmask(struct irq_data *d)
 
 static void ar71xx_misc_irq_mask(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -75,8 +79,8 @@ static void ar71xx_misc_irq_mask(struct irq_data *d)
 
 static void ar724x_misc_irq_ack(struct irq_data *d)
 {
-       unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
-       void __iomem *base = ath79_reset_base;
+       void __iomem *base = irq_data_get_irq_chip_data(d);
+       unsigned int irq = d->hwirq;
        u32 t;
 
        t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
@@ -94,12 +98,6 @@ static struct irq_chip ath79_misc_irq_chip = {
 
 static void __init ath79_misc_irq_init(void)
 {
-       void __iomem *base = ath79_reset_base;
-       int i;
-
-       __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-       __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
        if (soc_is_ar71xx() || soc_is_ar913x())
                ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
        else if (soc_is_ar724x() ||
@@ -110,13 +108,7 @@ static void __init ath79_misc_irq_init(void)
        else
                BUG();
 
-       for (i = ATH79_MISC_IRQ_BASE;
-            i < ATH79_MISC_IRQ_BASE + ATH79_MISC_IRQ_COUNT; i++) {
-               irq_set_chip_and_handler(i, &ath79_misc_irq_chip,
-                                        handle_level_irq);
-       }
-
-       irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler);
+       ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
 }
 
 static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
@@ -256,10 +248,10 @@ asmlinkage void plat_irq_dispatch(void)
        }
 }
 
-#ifdef CONFIG_IRQCHIP
 static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
        irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, d->host_data);
        return 0;
 }
 
@@ -268,19 +260,14 @@ static const struct irq_domain_ops misc_irq_domain_ops = {
        .map = misc_map,
 };
 
-static int __init ath79_misc_intc_of_init(
-       struct device_node *node, struct device_node *parent)
+static void __init ath79_misc_intc_domain_init(
+       struct device_node *node, int irq)
 {
        void __iomem *base = ath79_reset_base;
        struct irq_domain *domain;
-       int irq;
-
-       irq = irq_of_parse_and_map(node, 0);
-       if (!irq)
-               panic("Failed to get MISC IRQ");
 
        domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
-                       ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, NULL);
+                       ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
        if (!domain)
                panic("Failed to add MISC irqdomain");
 
@@ -288,9 +275,19 @@ static int __init ath79_misc_intc_of_init(
        __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
        __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
 
+       irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
+}
 
-       irq_set_chained_handler(irq, ath79_misc_irq_handler);
+static int __init ath79_misc_intc_of_init(
+       struct device_node *node, struct device_node *parent)
+{
+       int irq;
 
+       irq = irq_of_parse_and_map(node, 0);
+       if (!irq)
+               panic("Failed to get MISC IRQ");
+
+       ath79_misc_intc_domain_init(node, irq);
        return 0;
 }
 
@@ -349,8 +346,6 @@ static int __init ar79_cpu_intc_of_init(
 IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
                ar79_cpu_intc_of_init);
 
-#endif
-
 void __init arch_init_irq(void)
 {
        if (mips_machtype == ATH79_MACH_GENERIC_OF) {
index 8755d618e116ec32fd6fff90d0081b0bb2e4bb9c..be451ee4a5eaf5b8ba6de0d11fc8c76f6d6db7fa 100644 (file)
 
 #define ATH79_SYS_TYPE_LEN     64
 
-#define AR71XX_BASE_FREQ       40000000
-#define AR724X_BASE_FREQ       5000000
-#define AR913X_BASE_FREQ       5000000
-
 static char ath79_sys_type[ATH79_SYS_TYPE_LEN];
 
 static void ath79_restart(char *command)
@@ -272,15 +268,10 @@ void __init device_tree_init(void)
        unflatten_and_copy_device_tree();
 }
 
-static void __init ath79_generic_init(void)
-{
-       /* Nothing to do */
-}
-
 MIPS_MACHINE(ATH79_MACH_GENERIC,
             "Generic",
             "Generic AR71XX/AR724X/AR913X based board",
-            ath79_generic_init);
+            NULL);
 
 MIPS_MACHINE(ATH79_MACH_GENERIC_OF,
             "DTB",
index a7e569c7968ea7e8bc1a1ce3e629fc54cc2710ee..959c145a0a2c1654d1ac3b58a98db92ef435a713 100644 (file)
@@ -666,9 +666,15 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
        switch (bus->hosttype) {
        case BCMA_HOSTTYPE_PCI:
                memset(out, 0, sizeof(struct ssb_sprom));
-               snprintf(buf, sizeof(buf), "pci/%u/%u/",
-                        bus->host_pci->bus->number + 1,
-                        PCI_SLOT(bus->host_pci->devfn));
+               /* On BCM47XX all PCI buses share the same domain */
+               if (config_enabled(CONFIG_BCM47XX))
+                       snprintf(buf, sizeof(buf), "pci/%u/%u/",
+                                bus->host_pci->bus->number + 1,
+                                PCI_SLOT(bus->host_pci->devfn));
+               else
+                       snprintf(buf, sizeof(buf), "pci/%u/%u/",
+                                pci_domain_nr(bus->host_pci->bus) + 1,
+                                bus->host_pci->bus->number);
                bcm47xx_sprom_apply_prefix_alias(buf, sizeof(buf));
                prefix = buf;
                break;
index 4b50d40f7451ad86eba859b8a02ea792ff060b35..05757aed016cd6f72bedbd0ac31f379e8f0cd884 100644 (file)
@@ -10,6 +10,7 @@
 
 #define pr_fmt(fmt) "bcm63xx_nvram: " fmt
 
+#include <linux/bcm963xx_nvram.h>
 #include <linux/init.h>
 #include <linux/crc32.h>
 #include <linux/export.h>
 
 #include <bcm63xx_nvram.h>
 
-/*
- * nvram structure
- */
-struct bcm963xx_nvram {
-       u32     version;
-       u8      reserved1[256];
-       u8      name[16];
-       u32     main_tp_number;
-       u32     psi_size;
-       u32     mac_addr_count;
-       u8      mac_addr_base[ETH_ALEN];
-       u8      reserved2[2];
-       u32     checksum_old;
-       u8      reserved3[720];
-       u32     checksum_high;
-};
-
 #define BCM63XX_DEFAULT_PSI_SIZE       64
 
 static struct bcm963xx_nvram nvram;
@@ -42,27 +26,14 @@ static int mac_addr_used;
 
 void __init bcm63xx_nvram_init(void *addr)
 {
-       unsigned int check_len;
        u32 crc, expected_crc;
        u8 hcs_mac_addr[ETH_ALEN] = { 0x00, 0x10, 0x18, 0xff, 0xff, 0xff };
 
        /* extract nvram data */
-       memcpy(&nvram, addr, sizeof(nvram));
+       memcpy(&nvram, addr, BCM963XX_NVRAM_V5_SIZE);
 
        /* check checksum before using data */
-       if (nvram.version <= 4) {
-               check_len = offsetof(struct bcm963xx_nvram, reserved3);
-               expected_crc = nvram.checksum_old;
-               nvram.checksum_old = 0;
-       } else {
-               check_len = sizeof(nvram);
-               expected_crc = nvram.checksum_high;
-               nvram.checksum_high = 0;
-       }
-
-       crc = crc32_le(~0, (u8 *)&nvram, check_len);
-
-       if (crc != expected_crc)
+       if (bcm963xx_nvram_checksum(&nvram, &expected_crc, &crc))
                pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n",
                        expected_crc, crc);
 
index 5b16d2955fbb864afae0103753bd8d0d115eba0e..35535284b39efb323cb63b4f8ec37dcdfbdb7f60 100644 (file)
@@ -105,6 +105,7 @@ static const struct bmips_quirk bmips_quirk_list[] = {
        { "brcm,bcm33843-viper",        &bcm3384_viper_quirks           },
        { "brcm,bcm6328",               &bcm6328_quirks                 },
        { "brcm,bcm6368",               &bcm6368_quirks                 },
+       { "brcm,bcm63168",              &bcm6368_quirks                 },
        { },
 };
 
index d5bdee115f22f73058d464e5cc88aee8abc7cf46..4eff1ef02eff9abec4ae03d4248f144ef8fa0058 100644 (file)
@@ -29,20 +29,23 @@ KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
        -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
        -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
-targets := head.o decompress.o string.o dbg.o uart-16550.o uart-alchemy.o
-
 # decompressor objects (linked with vmlinuz)
 vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
 
 ifdef CONFIG_DEBUG_ZBOOT
 vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT)                 += $(obj)/dbg.o
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
+vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART_PROM) += $(obj)/uart-prom.o
 vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)                += $(obj)/uart-alchemy.o
 endif
 
-ifdef CONFIG_KERNEL_XZ
-vmlinuzobjs-y += $(obj)/../../lib/ashldi3.o
-endif
+vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
+
+$(obj)/ashldi3.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
+$(obj)/ashldi3.c: $(srctree)/arch/mips/lib/ashldi3.c
+       $(call cmd,shipped)
+
+targets := $(notdir $(vmlinuzobjs-y))
 
 targets += vmlinux.bin
 OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S
@@ -60,7 +63,7 @@ targets += vmlinux.bin.z
 $(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE
        $(call if_changed,$(tool_y))
 
-targets += piggy.o
+targets += piggy.o dummy.o
 OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
                        --set-section-flags=.image=contents,alloc,load,readonly,data
 $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
diff --git a/arch/mips/boot/compressed/uart-prom.c b/arch/mips/boot/compressed/uart-prom.c
new file mode 100644 (file)
index 0000000..1c3d51b
--- /dev/null
@@ -0,0 +1,7 @@
+
+extern void prom_putchar(unsigned char ch);
+
+void putc(char c)
+{
+       prom_putchar(c);
+}
index a0bf516ec3948827acbc526c6ebac5a7827e1a22..fc7a0a98e9bfc159ef3ba75ba2c791aa76e5c9a2 100644 (file)
@@ -4,6 +4,7 @@ dts-dirs        += ingenic
 dts-dirs       += lantiq
 dts-dirs       += mti
 dts-dirs       += netlogic
+dts-dirs       += pic32
 dts-dirs       += qca
 dts-dirs       += ralink
 dts-dirs       += xilfpga
index d52ce3d07f16dee81c5982212fe69f8cddc015d0..d61b1616b604552be9a30f89eb42539de6838b14 100644 (file)
@@ -31,6 +31,7 @@
        };
 
        aliases {
+               leds0 = &leds0;
                uart0 = &uart0;
        };
 
@@ -73,6 +74,7 @@
                timer: timer@10000040 {
                        compatible = "syscon";
                        reg = <0x10000040 0x2c>;
+                       little-endian;
                };
 
                reboot {
                        offset = <0x28>;
                        mask = <0x1>;
                };
+
+               leds0: led-controller@10000800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6328-leds";
+                       reg = <0x10000800 0x24>;
+                       status = "disabled";
+               };
        };
 };
index 45152bc22117f081d15a8b6dd2740305b35efbf6..9c8d3fe28b3114e51aaf128a640bde630d31a65b 100644 (file)
@@ -32,6 +32,7 @@
        };
 
        aliases {
+               leds0 = &leds0;
                uart0 = &uart0;
        };
 
                compatible = "simple-bus";
                ranges;
 
+               periph_cntl: syscon@10000000 {
+                       compatible = "syscon";
+                       reg = <0x10000000 0x14>;
+                       little-endian;
+               };
+
+               reboot: syscon-reboot@10000008 {
+                       compatible = "syscon-reboot";
+                       regmap = <&periph_cntl>;
+                       offset = <0x8>;
+                       mask = <0x1>;
+               };
+
                periph_intc: periph_intc@10000020 {
                        compatible = "brcm,bcm3380-l2-intc";
                        reg = <0x10000024 0x4 0x1000002c 0x4>,
                        interrupts = <2>;
                };
 
+               leds0: led-controller@100000d0 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6358-leds";
+                       reg = <0x100000d0 0x8>;
+                       status = "disabled";
+               };
+
                uart0: serial@10000100 {
                        compatible = "brcm,bcm6345-uart";
                        reg = <0x10000100 0x18>;
index 4fc7ecee273c105027ff20cea02f2bd5cf86c9fe..1a7efa883c5e3fd2e046b554485270036193b382 100644 (file)
@@ -98,6 +98,7 @@
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7125-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x60c>;
+                       little-endian;
                };
 
                reboot {
index a3039bb53477d40a426fec1d1318f3156ede8be3..d4bf52cfcf170ee8ac84daa874495e0a6420e542 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7346-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 4274ff41ec2122ac0bfd52d814aee8432c26553e..8e2501694d03fbd93827aeda79ef22f7cfd5d094 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7358-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 0dcc9163c27bdd0022e5b66f3ffd65da7fd6ce31..7e5f76040fb898b19a4bbc301c8a20f3b9368aa4 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7360-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 2f3f9fc2c478df36ef57c5990dd81f6757240e3a..c739ea77acb0dfe17363ec52cf390cace407e54c 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7362-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index bee221b3b56857c8d84dac3e2fa9bfe8b3c54a85..5f55d0a50a28622614ec6142eb0ff19746dfaade 100644 (file)
@@ -99,6 +99,7 @@
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7420-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x60c>;
+                       little-endian;
                };
 
                reboot {
index 571f30f52e3ff5780ec4fd72e18e72737e51537d..e24d41ab4e30f9163605180d78605fc02a477db6 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 614ee211f71a89356dd1eb814a38ec3071885747..8b9432cc062bc7e89898f2f1f2213926193389f8 100644 (file)
                sun_top_ctrl: syscon@404000 {
                        compatible = "brcm,bcm7425-sun-top-ctrl", "syscon";
                        reg = <0x404000 0x51c>;
+                       little-endian;
                };
 
                reboot {
index 9fcb9e7d1f5752902d08ccdb174b9deeca5afad0..1652d8d60b1e4b8673acaa7d2ec182ce7eb7ff98 100644 (file)
 &uart4 {
        status = "okay";
 };
+
+&nemc {
+       status = "okay";
+
+       nandc: nand-controller@1 {
+               compatible = "ingenic,jz4780-nand";
+               reg = <1 0 0x1000000>;
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ingenic,bch-controller = <&bch>;
+
+               ingenic,nemc-tAS = <10>;
+               ingenic,nemc-tAH = <5>;
+               ingenic,nemc-tBP = <10>;
+               ingenic,nemc-tAW = <15>;
+               ingenic,nemc-tSTRV = <100>;
+
+               nand@1 {
+                       reg = <1>;
+
+                       nand-ecc-step-size = <1024>;
+                       nand-ecc-strength = <24>;
+                       nand-ecc-mode = "hw";
+                       nand-on-flash-bbt;
+
+                       partitions {
+                               compatible = "fixed-partitions";
+                               #address-cells = <2>;
+                               #size-cells = <2>;
+
+                               partition@0 {
+                                       label = "u-boot-spl";
+                                       reg = <0x0 0x0 0x0 0x800000>;
+                               };
+
+                               partition@0x800000 {
+                                       label = "u-boot";
+                                       reg = <0x0 0x800000 0x0 0x200000>;
+                               };
+
+                               partition@0xa00000 {
+                                       label = "u-boot-env";
+                                       reg = <0x0 0xa00000 0x0 0x200000>;
+                               };
+
+                               partition@0xc00000 {
+                                       label = "boot";
+                                       reg = <0x0 0xc00000 0x0 0x4000000>;
+                               };
+
+                               partition@0x8c00000 {
+                                       label = "system";
+                                       reg = <0x0 0x4c00000 0x1 0xfb400000>;
+                               };
+                       };
+               };
+       };
+};
+
+&bch {
+       status = "okay";
+};
index 65389f6027333c8c4bed688fb38ed11a31d4bd2e..b868b429add23af4c14f42a366b96760e49eee66 100644 (file)
 
                status = "disabled";
        };
+
+       nemc: nemc@13410000 {
+               compatible = "ingenic,jz4780-nemc";
+               reg = <0x13410000 0x10000>;
+               #address-cells = <2>;
+               #size-cells = <1>;
+               ranges = <1 0 0x1b000000 0x1000000
+                         2 0 0x1a000000 0x1000000
+                         3 0 0x19000000 0x1000000
+                         4 0 0x18000000 0x1000000
+                         5 0 0x17000000 0x1000000
+                         6 0 0x16000000 0x1000000>;
+
+               clocks = <&cgu JZ4780_CLK_NEMC>;
+
+               status = "disabled";
+       };
+
+       bch: bch@134d0000 {
+               compatible = "ingenic,jz4780-bch";
+               reg = <0x134d0000 0x10000>;
+
+               clocks = <&cgu JZ4780_CLK_BCH>;
+
+               status = "disabled";
+       };
 };
diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile
new file mode 100644 (file)
index 0000000..7ac7905
--- /dev/null
@@ -0,0 +1,12 @@
+dtb-$(CONFIG_DTB_PIC32_MZDA_SK)                += pic32mzda_sk.dtb
+
+dtb-$(CONFIG_DTB_PIC32_NONE)           += \
+                                       pic32mzda_sk.dtb
+
+obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj-                           += dummy.o
+
+always                         := $(dtb-y)
+clean-files                    := *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi b/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
new file mode 100644 (file)
index 0000000..ef13350
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Device Tree Source for PIC32MZDA clock data
+ *
+ * Purna Chandra Mandal <purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+/* all fixed rate clocks */
+
+/ {
+       POSC:posc_clk { /* On-chip primary oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+       };
+
+       FRC:frc_clk { /* internal FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <8000000>;
+       };
+
+       BFRC:bfrc_clk { /* internal backup FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <8000000>;
+       };
+
+       LPRC:lprc_clk { /* internal low-power FRC oscillator */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <32000>;
+       };
+
+       /* UPLL provides clock to USBCORE */
+       UPLL:usb_phy_clk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+               clock-output-names = "usbphy_clk";
+       };
+
+       TxCKI:txcki_clk { /* external clock input on TxCLKI pin */
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <4000000>;
+               status = "disabled";
+       };
+
+       /* external clock input on REFCLKIx pin */
+       REFIx:refix_clk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <24000000>;
+               status = "disabled";
+       };
+
+       /* PIC32 specific clks */
+       pic32_clktree {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               reg = <0x1f801200 0x200>;
+               compatible = "microchip,pic32mzda-clk";
+               ranges = <0 0x1f801200 0x200>;
+
+               /* secondary oscillator; external input on SOSCI pin */
+               SOSC:sosc_clk@0 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-sosc";
+                       clock-frequency = <32768>;
+                       reg = <0x000 0x10>,   /* enable reg */
+                             <0x1d0 0x10>; /* status reg */
+                       microchip,bit-mask = <0x02>; /* enable mask */
+                       microchip,status-bit-mask = <0x10>; /* status-mask*/
+               };
+
+               FRCDIV:frcdiv_clk {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-frcdivclk";
+                       clocks = <&FRC>;
+                       clock-output-names = "frcdiv_clk";
+               };
+
+               /* System PLL clock */
+               SYSPLL:spll_clk@020 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-syspll";
+                       reg = <0x020 0x10>, /* SPLL register */
+                             <0x1d0 0x10>; /* CLKSTAT register */
+                       clocks = <&POSC>, <&FRC>;
+                       clock-output-names = "sys_pll";
+                       microchip,status-bit-mask = <0x80>; /* SPLLRDY */
+               };
+
+               /* system clock; mux with postdiv & slew */
+               SYSCLK:sys_clk@1c0 {
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-sysclk-v2";
+                       reg = <0x1c0 0x04>; /* SLEWCON */
+                       clocks = <&FRCDIV>, <&SYSPLL>, <&POSC>, <&SOSC>,
+                                <&LPRC>, <&FRCDIV>;
+                       microchip,clock-indices = <0>, <1>, <2>, <4>,
+                                                 <5>, <7>;
+                       clock-output-names = "sys_clk";
+               };
+
+               /* Peripheral bus1 clock */
+               PBCLK1:pb1_clk@140 {
+                       reg = <0x140 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb1_clk";
+                       /* used by system modules, not gateable */
+                       microchip,ignore-unused;
+               };
+
+               /* Peripheral bus2 clock */
+               PBCLK2:pb2_clk@150 {
+                       reg = <0x150 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb2_clk";
+                       /* avoid gating even if unused */
+                       microchip,ignore-unused;
+               };
+
+               /* Peripheral bus3 clock */
+               PBCLK3:pb3_clk@160 {
+                       reg = <0x160 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb3_clk";
+               };
+
+               /* Peripheral bus4 clock(I/O ports, GPIO) */
+               PBCLK4:pb4_clk@170 {
+                       reg = <0x170 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb4_clk";
+               };
+
+               /* Peripheral bus clock */
+               PBCLK5:pb5_clk@180 {
+                       reg = <0x180 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       clock-output-names = "pb5_clk";
+               };
+
+               /* Peripheral Bus6 clock; */
+               PBCLK6:pb6_clk@190 {
+                       reg = <0x190 0x10>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       clocks = <&SYSCLK>;
+                       #clock-cells = <0>;
+               };
+
+               /* Peripheral bus7 clock */
+               PBCLK7:pb7_clk@1a0 {
+                       reg = <0x1a0 0x10>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-pbclk";
+                       /* CPU is driven by this clock; so named */
+                       clock-output-names = "cpu_clk";
+                       clocks = <&SYSCLK>;
+               };
+
+               /* Reference Oscillator clock for SPI/I2S */
+               REFCLKO1:refo1_clk@80 {
+                       reg = <0x080 0x20>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       clock-output-names = "refo1_clk";
+               };
+
+               /* Reference Oscillator clock for SQI */
+               REFCLKO2:refo2_clk@a0 {
+                       reg = <0x0a0 0x20>;
+                       #clock-cells = <0>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       clock-output-names = "refo2_clk";
+               };
+
+               /* Reference Oscillator clock, ADC */
+               REFCLKO3:refo3_clk@c0 {
+                       reg = <0x0c0 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo3_clk";
+               };
+
+               /* Reference Oscillator clock */
+               REFCLKO4:refo4_clk@e0 {
+                       reg = <0x0e0 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+                                <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo4_clk";
+               };
+
+               /* Reference Oscillator clock, LCD */
+               REFCLKO5:refo5_clk@100 {
+                       reg = <0x100 0x20>;
+                       compatible = "microchip,pic32mzda-refoclk";
+                       clocks = <&SYSCLK>,<&PBCLK1>,<&POSC>,<&FRC>,<&LPRC>,
+                                <&SOSC>,<&SYSPLL>,<&REFIx>,<&BFRC>;
+                       microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+                                                 <5>, <7>, <8>, <9>;
+                       #clock-cells = <0>;
+                       clock-output-names = "refo5_clk";
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
new file mode 100644 (file)
index 0000000..ad9e331
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda-clk.dtsi"
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&evic>;
+
+       aliases {
+               gpio0 = &gpio0;
+               gpio1 = &gpio1;
+               gpio2 = &gpio2;
+               gpio3 = &gpio3;
+               gpio4 = &gpio4;
+               gpio5 = &gpio5;
+               gpio6 = &gpio6;
+               gpio7 = &gpio7;
+               gpio8 = &gpio8;
+               gpio9 = &gpio9;
+               serial0 = &uart1;
+               serial1 = &uart2;
+               serial2 = &uart3;
+               serial3 = &uart4;
+               serial4 = &uart5;
+               serial5 = &uart6;
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       compatible = "mti,mips14KEc";
+                       device_type = "cpu";
+               };
+       };
+
+       soc {
+               compatible = "microchip,pic32mzda-infra";
+               interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+       };
+
+       evic: interrupt-controller@1f810000 {
+               compatible = "microchip,pic32mzda-evic";
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               reg = <0x1f810000 0x1000>;
+               microchip,external-irqs = <3 8 13 18 23>;
+       };
+
+       pic32_pinctrl: pinctrl@1f801400{
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "microchip,pic32mzda-pinctrl";
+               reg = <0x1f801400 0x400>;
+               clocks = <&PBCLK1>;
+       };
+
+       /* PORTA */
+       gpio0: gpio0@1f860000 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860000 0x100>;
+               interrupts = <118 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <0>;
+               gpio-ranges = <&pic32_pinctrl 0 0 16>;
+       };
+
+       /* PORTB */
+       gpio1: gpio1@1f860100 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860100 0x100>;
+               interrupts = <119 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <1>;
+               gpio-ranges = <&pic32_pinctrl 0 16 16>;
+       };
+
+       /* PORTC */
+       gpio2: gpio2@1f860200 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860200 0x100>;
+               interrupts = <120 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <2>;
+               gpio-ranges = <&pic32_pinctrl 0 32 16>;
+       };
+
+       /* PORTD */
+       gpio3: gpio3@1f860300 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860300 0x100>;
+               interrupts = <121 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <3>;
+               gpio-ranges = <&pic32_pinctrl 0 48 16>;
+       };
+
+       /* PORTE */
+       gpio4: gpio4@1f860400 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860400 0x100>;
+               interrupts = <122 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <4>;
+               gpio-ranges = <&pic32_pinctrl 0 64 16>;
+       };
+
+       /* PORTF */
+       gpio5: gpio5@1f860500 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860500 0x100>;
+               interrupts = <123 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <5>;
+               gpio-ranges = <&pic32_pinctrl 0 80 16>;
+       };
+
+       /* PORTG */
+       gpio6: gpio6@1f860600 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860600 0x100>;
+               interrupts = <124 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <6>;
+               gpio-ranges = <&pic32_pinctrl 0 96 16>;
+       };
+
+       /* PORTH */
+       gpio7: gpio7@1f860700 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860700 0x100>;
+               interrupts = <125 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <7>;
+               gpio-ranges = <&pic32_pinctrl 0 112 16>;
+       };
+
+       /* PORTI does not exist */
+
+       /* PORTJ */
+       gpio8: gpio8@1f860800 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860800 0x100>;
+               interrupts = <126 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <8>;
+               gpio-ranges = <&pic32_pinctrl 0 128 16>;
+       };
+
+       /* PORTK */
+       gpio9: gpio9@1f860900 {
+               compatible = "microchip,pic32mzda-gpio";
+               reg = <0x1f860900 0x100>;
+               interrupts = <127 IRQ_TYPE_LEVEL_HIGH>;
+               #gpio-cells = <2>;
+               gpio-controller;
+               interrupt-controller;
+               #interrupt-cells = <2>;
+               clocks = <&PBCLK4>;
+               microchip,gpio-bank = <9>;
+               gpio-ranges = <&pic32_pinctrl 0 144 16>;
+       };
+
+       sdhci: sdhci@1f8ec000 {
+               compatible = "microchip,pic32mzda-sdhci";
+               reg = <0x1f8ec000 0x100>;
+               interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&REFCLKO4>, <&PBCLK5>;
+               clock-names = "base_clk", "sys_clk";
+               bus-width = <4>;
+               cap-sd-highspeed;
+               status = "disabled";
+       };
+
+       uart1: serial@1f822000 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822000 0x50>;
+               interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
+                       <113 IRQ_TYPE_LEVEL_HIGH>,
+                       <114 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart2: serial@1f822200 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822200 0x50>;
+               interrupts = <145 IRQ_TYPE_LEVEL_HIGH>,
+                       <146 IRQ_TYPE_LEVEL_HIGH>,
+                       <147 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart3: serial@1f822400 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822400 0x50>;
+               interrupts = <157 IRQ_TYPE_LEVEL_HIGH>,
+                       <158 IRQ_TYPE_LEVEL_HIGH>,
+                       <159 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart4: serial@1f822600 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822600 0x50>;
+               interrupts = <170 IRQ_TYPE_LEVEL_HIGH>,
+                       <171 IRQ_TYPE_LEVEL_HIGH>,
+                       <172 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart5: serial@1f822800 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822800 0x50>;
+               interrupts = <179 IRQ_TYPE_LEVEL_HIGH>,
+                       <180 IRQ_TYPE_LEVEL_HIGH>,
+                       <181 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+
+       uart6: serial@1f822A00 {
+               compatible = "microchip,pic32mzda-uart";
+               reg = <0x1f822A00 0x50>;
+               interrupts = <188 IRQ_TYPE_LEVEL_HIGH>,
+                       <189 IRQ_TYPE_LEVEL_HIGH>,
+                       <190 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&PBCLK2>;
+               status = "disabled";
+       };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda_sk.dts b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
new file mode 100644 (file)
index 0000000..5d434a5
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda.dtsi"
+
+/ {
+       compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda";
+       model = "Microchip PIC32MZDA Starter Kit";
+
+       memory {
+               device_type = "memory";
+               reg = <0x08000000 0x08000000>;
+       };
+
+       chosen {
+               bootargs = "earlyprintk=ttyPIC1,115200n8r console=ttyPIC1,115200n8";
+       };
+
+       leds0 {
+               compatible = "gpio-leds";
+               pinctrl-names = "default";
+               pinctrl-0 = <&user_leds_s0>;
+
+               led@1 {
+                       label = "pic32mzda_sk:red:led1";
+                       gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+                       linux,default-trigger = "heartbeat";
+               };
+
+               led@2 {
+                       label = "pic32mzda_sk:yellow:led2";
+                       gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
+                       linux,default-trigger = "mmc0";
+               };
+
+               led@3 {
+                       label = "pic32mzda_sk:green:led3";
+                       gpios = <&gpio7 2 GPIO_ACTIVE_HIGH>;
+                       default-state = "on";
+               };
+       };
+
+       keys0 {
+               compatible = "gpio-keys";
+               pinctrl-0 = <&user_buttons_s0>;
+               pinctrl-names = "default";
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               button@sw1 {
+                       label = "ESC";
+                       linux,code = <1>;
+                       gpios = <&gpio1 12 0>;
+               };
+
+               button@sw2 {
+                       label = "Home";
+                       linux,code = <102>;
+                       gpios = <&gpio1 13 0>;
+               };
+
+               button@sw3 {
+                       label = "Menu";
+                       linux,code = <139>;
+                       gpios = <&gpio1 14 0>;
+               };
+       };
+};
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2>;
+       status = "okay";
+};
+
+&uart4 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart4>;
+       status = "okay";
+};
+
+&sdhci {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_sdhc1>;
+       status = "okay";
+       assigned-clocks = <&REFCLKO2>,<&REFCLKO4>,<&REFCLKO5>;
+       assigned-clock-rates = <50000000>,<25000000>,<40000000>;
+};
+
+&pic32_pinctrl {
+
+       pinctrl_sdhc1: sdhc1_pins0 {
+               pins = "A6", "D4", "G13", "G12", "G14", "A7", "A0";
+               microchip,digital;
+       };
+
+       user_leds_s0: user_leds_s0 {
+               pins = "H0", "H1", "H2";
+               output-low;
+               microchip,digital;
+       };
+
+       user_buttons_s0: user_buttons_s0 {
+               pins = "B12", "B13", "B14";
+               microchip,digital;
+               input-enable;
+               bias-pull-up;
+       };
+
+       pinctrl_uart2: pinctrl_uart2 {
+               uart2-tx {
+                       pins = "G9";
+                       function = "U2TX";
+                       microchip,digital;
+                       output-high;
+               };
+               uart2-rx {
+                       pins = "B0";
+                       function = "U2RX";
+                       microchip,digital;
+                       input-enable;
+               };
+       };
+
+       pinctrl_uart4: uart4-0 {
+               uart4-tx {
+                       pins = "C3";
+                       function = "U4TX";
+                       microchip,digital;
+                       output-high;
+               };
+               uart4-rx {
+                       pins = "E8";
+                       function = "U4RX";
+                       microchip,digital;
+                       input-enable;
+               };
+       };
+};
index 13d0439496a9d28d500b2d3e37b585e3f4541335..3ad4ba9b12fd6a649eb07b201414f18ba60b83a7 100644 (file)
                        };
                };
 
+               usb@1b000100 {
+                       compatible = "qca,ar7100-ehci", "generic-ehci";
+                       reg = <0x1b000100 0x100>;
+
+                       interrupts = <3>;
+                       resets = <&rst 5>;
+
+                       has-transaction-translator;
+
+                       phy-names = "usb";
+                       phys = <&usb_phy>;
+
+                       status = "disabled";
+               };
+
                spi@1f000000 {
                        compatible = "qca,ar9132-spi", "qca,ar7100-spi";
                        reg = <0x1f000000 0x10>;
                        #size-cells = <0>;
                };
        };
+
+       usb_phy: usb-phy {
+               compatible = "qca,ar7100-usb-phy";
+
+               reset-names = "usb-phy", "usb-suspend-override";
+               resets = <&rst 4>, <&rst 3>;
+
+               #phy-cells = <0>;
+
+               status = "disabled";
+       };
 };
index 003015ab34e7cffcde16231b6c9b871862a697a7..e535ee3c26a4e8402a5da1dcc8c0f748205dacca 100644 (file)
                        };
                };
 
+               usb@1b000100 {
+                       status = "okay";
+               };
+
                spi@1f000000 {
                        status = "okay";
                        num-cs = <1>;
                };
        };
 
+       usb-phy {
+               status = "okay";
+       };
+
        gpio-keys {
                compatible = "gpio-keys-polled";
                #address-cells = <1>;
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
new file mode 100644 (file)
index 0000000..52192c6
--- /dev/null
@@ -0,0 +1,89 @@
+CONFIG_MACH_PIC32=y
+CONFIG_DTB_PIC32_MZDA_SK=y
+CONFIG_HZ_100=y
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_SECCOMP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_RELAY=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_BINFMT_MISC=m
+# CONFIG_SUSPEND is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_INPUT_LEDS=m
+CONFIG_INPUT_POLLDEV=y
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_PIC32=y
+CONFIG_SERIAL_PIC32_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_RAW_DRIVER=m
+CONFIG_GPIO_SYSFS=y
+# CONFIG_HWMON is not set
+CONFIG_HIDRAW=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_MICROCHIP_PIC32=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_ONESHOT=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_GPIO=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+# CONFIG_MIPS_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_FSCACHE=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
index 06b9bc7ea14b1da802dc29090174230cc2e1d613..c3212ff2672343fcb52315403c73d16759be77b1 100644 (file)
 #ifndef __ASM_CACHEOPS_H
 #define __ASM_CACHEOPS_H
 
+/*
+ * Most cache ops are split into a 2 bit field identifying the cache, and a 3
+ * bit field identifying the cache operation.
+ */
+#define CacheOp_Cache                  0x03
+#define CacheOp_Op                     0x1c
+
+#define Cache_I                                0x00
+#define Cache_D                                0x01
+#define Cache_T                                0x02
+#define Cache_S                                0x03
+
+#define Index_Writeback_Inv            0x00
+#define Index_Load_Tag                 0x04
+#define Index_Store_Tag                        0x08
+#define Hit_Invalidate                 0x10
+#define Hit_Writeback_Inv              0x14    /* not with Cache_I though */
+#define Hit_Writeback                  0x18
+
 /*
  * Cache Operations available on all MIPS processors with R4000-style caches
  */
-#define Index_Invalidate_I             0x00
-#define Index_Writeback_Inv_D          0x01
-#define Index_Load_Tag_I               0x04
-#define Index_Load_Tag_D               0x05
-#define Index_Store_Tag_I              0x08
-#define Index_Store_Tag_D              0x09
-#define Hit_Invalidate_I               0x10
-#define Hit_Invalidate_D               0x11
-#define Hit_Writeback_Inv_D            0x15
+#define Index_Invalidate_I             (Cache_I | Index_Writeback_Inv)
+#define Index_Writeback_Inv_D          (Cache_D | Index_Writeback_Inv)
+#define Index_Load_Tag_I               (Cache_I | Index_Load_Tag)
+#define Index_Load_Tag_D               (Cache_D | Index_Load_Tag)
+#define Index_Store_Tag_I              (Cache_I | Index_Store_Tag)
+#define Index_Store_Tag_D              (Cache_D | Index_Store_Tag)
+#define Hit_Invalidate_I               (Cache_I | Hit_Invalidate)
+#define Hit_Invalidate_D               (Cache_D | Hit_Invalidate)
+#define Hit_Writeback_Inv_D            (Cache_D | Hit_Writeback_Inv)
 
 /*
  * R4000-specific cacheops
  */
-#define Create_Dirty_Excl_D            0x0d
-#define Fill                           0x14
-#define Hit_Writeback_I                        0x18
-#define Hit_Writeback_D                        0x19
+#define Create_Dirty_Excl_D            (Cache_D | 0x0c)
+#define Fill                           (Cache_I | 0x14)
+#define Hit_Writeback_I                        (Cache_I | Hit_Writeback)
+#define Hit_Writeback_D                        (Cache_D | Hit_Writeback)
 
 /*
  * R4000SC and R4400SC-specific cacheops
  */
-#define Index_Invalidate_SI            0x02
-#define Index_Writeback_Inv_SD         0x03
-#define Index_Load_Tag_SI              0x06
-#define Index_Load_Tag_SD              0x07
-#define Index_Store_Tag_SI             0x0A
-#define Index_Store_Tag_SD             0x0B
-#define Create_Dirty_Excl_SD           0x0f
-#define Hit_Invalidate_SI              0x12
-#define Hit_Invalidate_SD              0x13
-#define Hit_Writeback_Inv_SD           0x17
-#define Hit_Writeback_SD               0x1b
-#define Hit_Set_Virtual_SI             0x1e
-#define Hit_Set_Virtual_SD             0x1f
+#define Cache_SI                       0x02
+#define Cache_SD                       0x03
+
+#define Index_Invalidate_SI            (Cache_SI | Index_Writeback_Inv)
+#define Index_Writeback_Inv_SD         (Cache_SD | Index_Writeback_Inv)
+#define Index_Load_Tag_SI              (Cache_SI | Index_Load_Tag)
+#define Index_Load_Tag_SD              (Cache_SD | Index_Load_Tag)
+#define Index_Store_Tag_SI             (Cache_SI | Index_Store_Tag)
+#define Index_Store_Tag_SD             (Cache_SD | Index_Store_Tag)
+#define Create_Dirty_Excl_SD           (Cache_SD | 0x0c)
+#define Hit_Invalidate_SI              (Cache_SI | Hit_Invalidate)
+#define Hit_Invalidate_SD              (Cache_SD | Hit_Invalidate)
+#define Hit_Writeback_Inv_SD           (Cache_SD | Hit_Writeback_Inv)
+#define Hit_Writeback_SD               (Cache_SD | Hit_Writeback)
+#define Hit_Set_Virtual_SI             (Cache_SI | 0x1c)
+#define Hit_Set_Virtual_SD             (Cache_SD | 0x1c)
 
 /*
  * R5000-specific cacheops
  */
-#define R5K_Page_Invalidate_S          0x17
+#define R5K_Page_Invalidate_S          (Cache_S | 0x14)
 
 /*
  * RM7000-specific cacheops
  */
-#define Page_Invalidate_T              0x16
-#define Index_Store_Tag_T              0x0a
-#define Index_Load_Tag_T               0x06
+#define Page_Invalidate_T              (Cache_T | 0x14)
+#define Index_Store_Tag_T              (Cache_T | Index_Store_Tag)
+#define Index_Load_Tag_T               (Cache_T | Index_Load_Tag)
 
 /*
  * R10000-specific cacheops
  * Cacheops 0x02, 0x06, 0x0a, 0x0c-0x0e, 0x16, 0x1a and 0x1e are unused.
  * Most of the _S cacheops are identical to the R4000SC _SD cacheops.
  */
-#define Index_Writeback_Inv_S          0x03
-#define Index_Load_Tag_S               0x07
-#define Index_Store_Tag_S              0x0B
-#define Hit_Invalidate_S               0x13
+#define Index_Writeback_Inv_S          (Cache_S | Index_Writeback_Inv)
+#define Index_Load_Tag_S               (Cache_S | Index_Load_Tag)
+#define Index_Store_Tag_S              (Cache_S | Index_Store_Tag)
+#define Hit_Invalidate_S               (Cache_S | Hit_Invalidate)
 #define Cache_Barrier                  0x14
-#define Hit_Writeback_Inv_S            0x17
-#define Index_Load_Data_I              0x18
-#define Index_Load_Data_D              0x19
-#define Index_Load_Data_S              0x1b
-#define Index_Store_Data_I             0x1c
-#define Index_Store_Data_D             0x1d
-#define Index_Store_Data_S             0x1f
+#define Hit_Writeback_Inv_S            (Cache_S | Hit_Writeback_Inv)
+#define Index_Load_Data_I              (Cache_I | 0x18)
+#define Index_Load_Data_D              (Cache_D | 0x18)
+#define Index_Load_Data_S              (Cache_S | 0x18)
+#define Index_Store_Data_I             (Cache_I | 0x1c)
+#define Index_Store_Data_D             (Cache_D | 0x1c)
+#define Index_Store_Data_S             (Cache_S | 0x1c)
 
 /*
  * Loongson2-specific cacheops
  */
-#define Hit_Invalidate_I_Loongson2     0x00
+#define Hit_Invalidate_I_Loongson2     (Cache_I | 0x00)
 
 #endif /* __ASM_CACHEOPS_H */
index d1e04c943f5f7c7d9ec232851e14d9b5c4215496..eeec8c8e2da2ed59e3d89d099721209203e9c243 100644 (file)
 # define cpu_has_small_pages   (cpu_data[0].options & MIPS_CPU_SP)
 #endif
 
+#ifndef cpu_has_nan_legacy
+#define cpu_has_nan_legacy     (cpu_data[0].options & MIPS_CPU_NAN_LEGACY)
+#endif
+#ifndef cpu_has_nan_2008
+#define cpu_has_nan_2008       (cpu_data[0].options & MIPS_CPU_NAN_2008)
+#endif
+
 #endif /* __ASM_CPU_FEATURES_H */
index 82ad15f11049284c2f347fc66b68ea5091fb810d..a97ca97285ecf1dd897c9c7a9610209e15ad70c7 100644 (file)
@@ -386,6 +386,8 @@ enum cpu_type_enum {
 #define MIPS_CPU_BP_GHIST      0x8000000000ull /* R12K+ Branch Prediction Global History */
 #define MIPS_CPU_SP            0x10000000000ull /* Small (1KB) page support */
 #define MIPS_CPU_FTLB          0x20000000000ull /* CPU has Fixed-page-size TLB */
+#define MIPS_CPU_NAN_LEGACY    0x40000000000ull /* Legacy NaN implemented */
+#define MIPS_CPU_NAN_2008      0x80000000000ull /* 2008 NaN implemented */
 
 /*
  * CPU ASE encodings
index b01a6ff468e00aab5d185a9dd53023101b34f2b7..cefb7a5968783094befa3ee5925b02ab1f972fac 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/fs.h>
 #include <uapi/linux/elf.h>
 
-#include <asm/cpu-info.h>
 #include <asm/current.h>
 
 /* ELF header e_flags defines. */
@@ -44,6 +43,7 @@
 #define EF_MIPS_OPTIONS_FIRST  0x00000080
 #define EF_MIPS_32BITMODE      0x00000100
 #define EF_MIPS_FP64           0x00000200
+#define EF_MIPS_NAN2008                0x00000400
 #define EF_MIPS_ABI            0x0000f000
 #define EF_MIPS_ARCH           0xf0000000
 
@@ -305,7 +305,7 @@ do {                                                                        \
                                                                        \
        current->thread.abi = &mips_abi;                                \
                                                                        \
-       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
+       mips_set_personality_nan(state);                                \
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -367,7 +367,7 @@ do {                                                                        \
        else                                                            \
                current->thread.abi = &mips_abi;                        \
                                                                        \
-       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
+       mips_set_personality_nan(state);                                \
                                                                        \
        p = personality(current->personality);                          \
        if (p != PER_LINUX32 && p != PER_LINUX)                         \
@@ -432,6 +432,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
                                       int uses_interp);
 
 struct arch_elf_state {
+       int nan_2008;
        int fp_abi;
        int interp_fp_abi;
        int overall_fp_mode;
@@ -440,17 +441,23 @@ struct arch_elf_state {
 #define MIPS_ABI_FP_UNKNOWN    (-1)    /* Unknown FP ABI (kernel internal) */
 
 #define INIT_ARCH_ELF_STATE {                  \
+       .nan_2008 = -1,                         \
        .fp_abi = MIPS_ABI_FP_UNKNOWN,          \
        .interp_fp_abi = MIPS_ABI_FP_UNKNOWN,   \
        .overall_fp_mode = -1,                  \
 }
 
+/* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
+extern bool mips_use_nan_legacy;
+extern bool mips_use_nan_2008;
+
 extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
                            bool is_interp, struct arch_elf_state *state);
 
-extern int arch_check_elf(void *ehdr, bool has_interpreter,
+extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
                          struct arch_elf_state *state);
 
+extern void mips_set_personality_nan(struct arch_elf_state *state);
 extern void mips_set_personality_fp(struct arch_elf_state *state);
 
 #endif /* _ASM_ELF_H */
index 2f021cdfba4f8fc60a5348f7822f0014d02a3eff..3225c3c0724b3642392bad7e9c395c9451afe555 100644 (file)
@@ -79,7 +79,7 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 /*
  * Break instruction with special math emu break code set
  */
-#define BREAK_MATH (0x0000000d | (BRK_MEMU << 16))
+#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
 
 #define SIGNALLING_NAN 0x7ff800007ff80000LL
 
index d10fd80dbb7e96b898d2230c2d0f5112d02c3bc1..2b4dc7ad53b8a32bc139dd33d15cb5bb533f8d85 100644 (file)
@@ -275,6 +275,7 @@ static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long si
  */
 #define ioremap_cachable(offset, size)                                 \
        __ioremap_mode((offset), (size), _page_cachable_default)
+#define ioremap_cache ioremap_cachable
 
 /*
  * These two are MIPS specific ioremap variant.         ioremap_cacheable_cow
index e7b138b4b3d37f9f038aaec933b5040f0e7c3b2a..65c351e328cc932af4aedca946ee19f9bd43f8c3 100644 (file)
@@ -84,41 +84,11 @@ static inline void arch_local_irq_restore(unsigned long flags)
        : "memory");
 }
 
-static inline void __arch_local_irq_restore(unsigned long flags)
-{
-       __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-#if defined(CONFIG_IRQ_MIPS_CPU)
-       /*
-        * Slow, but doesn't suffer from a relatively unlikely race
-        * condition we're having since days 1.
-        */
-       "       beqz    %[flags], 1f                                    \n"
-       "       di                                                      \n"
-       "       ei                                                      \n"
-       "1:                                                             \n"
-#else
-       /*
-        * Fast, dangerous.  Life is fun, life is good.
-        */
-       "       mfc0    $1, $12                                         \n"
-       "       ins     $1, %[flags], 0, 1                              \n"
-       "       mtc0    $1, $12                                         \n"
-#endif
-       "       " __stringify(__irq_disable_hazard) "                   \n"
-       "       .set    pop                                             \n"
-       : [flags] "=r" (flags)
-       : "0" (flags)
-       : "memory");
-}
 #else
 /* Functions that require preempt_{dis,en}able() are in mips-atomic.c */
 void arch_local_irq_disable(void);
 unsigned long arch_local_irq_save(void);
 void arch_local_irq_restore(unsigned long flags);
-void __arch_local_irq_restore(unsigned long flags);
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 static inline void arch_local_irq_enable(void)
index 7c191443c7ea199f876a2c495b2b6d3f41bfc1d5..f6b12790716c071db8a3ce17971dd146c30025f5 100644 (file)
@@ -58,7 +58,7 @@
 #define KVM_MAX_VCPUS          1
 #define KVM_USER_MEM_SLOTS     8
 /* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS  0
+#define KVM_PRIVATE_MEM_SLOTS  0
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 #define KVM_INVALID_INST               0xdeadbeef
 #define KVM_INVALID_ADDR               0xdeadbeef
 
-#define KVM_MALTA_GUEST_RTC_ADDR       0xb8000070UL
-
-#define GUEST_TICKS_PER_JIFFY          (40000000/HZ)
-#define MS_TO_NS(x)                    (x * 1E6L)
-
-#define CAUSEB_DC                      27
-#define CAUSEF_DC                      (_ULCAST_(1) << 27)
-
 extern atomic_t kvm_mips_instance;
 extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
@@ -289,34 +281,6 @@ enum mips_mmu_types {
        MMU_TYPE_R8000
 };
 
-/*
- * Trap codes
- */
-#define T_INT                  0       /* Interrupt pending */
-#define T_TLB_MOD              1       /* TLB modified fault */
-#define T_TLB_LD_MISS          2       /* TLB miss on load or ifetch */
-#define T_TLB_ST_MISS          3       /* TLB miss on a store */
-#define T_ADDR_ERR_LD          4       /* Address error on a load or ifetch */
-#define T_ADDR_ERR_ST          5       /* Address error on a store */
-#define T_BUS_ERR_IFETCH       6       /* Bus error on an ifetch */
-#define T_BUS_ERR_LD_ST                7       /* Bus error on a load or store */
-#define T_SYSCALL              8       /* System call */
-#define T_BREAK                        9       /* Breakpoint */
-#define T_RES_INST             10      /* Reserved instruction exception */
-#define T_COP_UNUSABLE         11      /* Coprocessor unusable */
-#define T_OVFLOW               12      /* Arithmetic overflow */
-
-/*
- * Trap definitions added for r4000 port.
- */
-#define T_TRAP                 13      /* Trap instruction */
-#define T_VCEI                 14      /* Virtual coherency exception */
-#define T_MSAFPE               14      /* MSA floating point exception */
-#define T_FPE                  15      /* Floating point exception */
-#define T_MSADIS               21      /* MSA disabled exception */
-#define T_WATCH                        23      /* Watch address reference */
-#define T_VCED                 31      /* Virtual coherency data */
-
 /* Resume Flags */
 #define RESUME_FLAG_DR         (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST       (1<<1)  /* Resume host? */
@@ -686,7 +650,6 @@ extern void kvm_mips_dump_host_tlbs(void);
 extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
 extern void kvm_mips_flush_host_tlb(int skip_kseg0);
 extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
-extern int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index);
 
 extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
                                     unsigned long entryhi);
index 4eee221b0cf0bce93fa7adb7b634c9feebcdf21d..2b3487213d1e932247077d8b3f67ba28612f2683 100644 (file)
@@ -115,6 +115,7 @@ static inline int soc_is_qca955x(void)
        return soc_is_qca9556() || soc_is_qca9558();
 }
 
+void ath79_ddr_wb_flush(unsigned int reg);
 void ath79_ddr_set_pci_windows(void);
 
 extern void __iomem *ath79_pll_base;
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
deleted file mode 100644 (file)
index 1e6b587..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef __BCM963XX_TAG_H
-#define __BCM963XX_TAG_H
-
-#define TAGVER_LEN             4       /* Length of Tag Version */
-#define TAGLAYOUT_LEN          4       /* Length of FlashLayoutVer */
-#define SIG1_LEN               20      /* Company Signature 1 Length */
-#define SIG2_LEN               14      /* Company Signature 2 Length */
-#define BOARDID_LEN            16      /* Length of BoardId */
-#define ENDIANFLAG_LEN         2       /* Endian Flag Length */
-#define CHIPID_LEN             6       /* Chip Id Length */
-#define IMAGE_LEN              10      /* Length of Length Field */
-#define ADDRESS_LEN            12      /* Length of Address field */
-#define DUALFLAG_LEN           2       /* Dual Image flag Length */
-#define INACTIVEFLAG_LEN       2       /* Inactie Flag Length */
-#define RSASIG_LEN             20      /* Length of RSA Signature in tag */
-#define TAGINFO1_LEN           30      /* Length of vendor information field1 in tag */
-#define FLASHLAYOUTVER_LEN     4       /* Length of Flash Layout Version String tag */
-#define TAGINFO2_LEN           16      /* Length of vendor information field2 in tag */
-#define ALTTAGINFO_LEN         54      /* Alternate length for vendor information; Pirelli */
-
-#define NUM_PIRELLI            2
-#define IMAGETAG_CRC_START     0xFFFFFFFF
-
-#define PIRELLI_BOARDS { \
-       "AGPF-S0", \
-       "DWV-S0", \
-}
-
-/*
- * The broadcom firmware assumes the rootfs starts the image,
- * therefore uses the rootfs start (flash_image_address)
- * to determine where to flash the image.  Since we have the kernel first
- * we have to give it the kernel address, but the crc uses the length
- * associated with this address (root_length), which is added to the kernel
- * length (kernel_length) to determine the length of image to flash and thus
- * needs to be rootfs + deadcode (jffs2 EOF marker)
-*/
-
-struct bcm_tag {
-       /* 0-3: Version of the image tag */
-       char tag_version[TAGVER_LEN];
-       /* 4-23: Company Line 1 */
-       char sig_1[SIG1_LEN];
-       /*  24-37: Company Line 2 */
-       char sig_2[SIG2_LEN];
-       /* 38-43: Chip this image is for */
-       char chip_id[CHIPID_LEN];
-       /* 44-59: Board name */
-       char board_id[BOARDID_LEN];
-       /* 60-61: Map endianness -- 1 BE 0 LE */
-       char big_endian[ENDIANFLAG_LEN];
-       /* 62-71: Total length of image */
-       char total_length[IMAGE_LEN];
-       /* 72-83: Address in memory of CFE */
-       char cfe__address[ADDRESS_LEN];
-       /* 84-93: Size of CFE */
-       char cfe_length[IMAGE_LEN];
-       /* 94-105: Address in memory of image start
-        * (kernel for OpenWRT, rootfs for stock firmware)
-        */
-       char flash_image_start[ADDRESS_LEN];
-       /* 106-115: Size of rootfs */
-       char root_length[IMAGE_LEN];
-       /* 116-127: Address in memory of kernel */
-       char kernel_address[ADDRESS_LEN];
-       /* 128-137: Size of kernel */
-       char kernel_length[IMAGE_LEN];
-       /* 138-139: Unused at the moment */
-       char dual_image[DUALFLAG_LEN];
-       /* 140-141: Unused at the moment */
-       char inactive_flag[INACTIVEFLAG_LEN];
-       /* 142-161: RSA Signature (not used; some vendors may use this) */
-       char rsa_signature[RSASIG_LEN];
-       /* 162-191: Compilation and related information (not used in OpenWrt) */
-       char information1[TAGINFO1_LEN];
-       /* 192-195: Version flash layout */
-       char flash_layout_ver[FLASHLAYOUTVER_LEN];
-       /* 196-199: kernel+rootfs CRC32 */
-       __u32 fskernel_crc;
-       /* 200-215: Unused except on Alice Gate where is is information */
-       char information2[TAGINFO2_LEN];
-       /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
-       __u32 image_crc;
-       /* 220-223: CRC32 of rootfs partition */
-       __u32 rootfs_crc;
-       /* 224-227: CRC32 of kernel partition */
-       __u32 kernel_crc;
-       /* 228-235: Unused at present */
-       char reserved1[8];
-       /* 236-239: CRC32 of header excluding last 20 bytes */
-       __u32 header_crc;
-       /* 240-255: Unused at present */
-       char reserved2[16];
-};
-
-#endif /* __BCM63XX_TAG_H */
diff --git a/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..4682308
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+
+/*
+ * CPU feature overrides for PIC32 boards
+ */
+#ifdef CONFIG_CPU_MIPS32
+#define cpu_has_vint           1
+#define cpu_has_veic           0
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_4k_cache       1
+#define cpu_has_fpu            0
+#define cpu_has_counter                1
+#define cpu_has_llsc           1
+#define cpu_has_nofpuex                0
+#define cpu_icache_snoops_remote_store 1
+#endif
+
+#ifdef CONFIG_CPU_MIPS64
+#error This platform does not support 64bit.
+#endif
+
+#endif /* __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-pic32/irq.h b/arch/mips/include/asm/mach-pic32/irq.h
new file mode 100644 (file)
index 0000000..864330c
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_IRQ_H
+#define __ASM_MACH_PIC32_IRQ_H
+
+#define NR_IRQS        256
+#define MIPS_CPU_IRQ_BASE 0
+
+#include_next <irq.h>
+
+#endif /* __ASM_MACH_PIC32_IRQ_H */
diff --git a/arch/mips/include/asm/mach-pic32/pic32.h b/arch/mips/include/asm/mach-pic32/pic32.h
new file mode 100644 (file)
index 0000000..ce52e91
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_H
+#define _ASM_MACH_PIC32_H
+
+#include <linux/io.h>
+
+/*
+ * PIC32 register offsets for SET/CLR/INV where supported.
+ */
+#define PIC32_CLR(_reg)                ((_reg) + 0x04)
+#define PIC32_SET(_reg)                ((_reg) + 0x08)
+#define PIC32_INV(_reg)                ((_reg) + 0x0C)
+
+/*
+ * PIC32 Base Register Offsets
+ */
+#define PIC32_BASE_CONFIG      0x1f800000
+#define PIC32_BASE_OSC         0x1f801200
+#define PIC32_BASE_RESET       0x1f801240
+#define PIC32_BASE_PPS         0x1f801400
+#define PIC32_BASE_UART                0x1f822000
+#define PIC32_BASE_PORT                0x1f860000
+#define PIC32_BASE_DEVCFG2     0x1fc4ff44
+
+/*
+ * Register unlock sequence required for some register access.
+ */
+void pic32_syskey_unlock_debug(const char *fn, const ulong ln);
+#define pic32_syskey_unlock()  \
+       pic32_syskey_unlock_debug(__func__, __LINE__)
+
+#endif /* _ASM_MACH_PIC32_H */
diff --git a/arch/mips/include/asm/mach-pic32/spaces.h b/arch/mips/include/asm/mach-pic32/spaces.h
new file mode 100644 (file)
index 0000000..046a0a9
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_SPACES_H
+#define _ASM_MACH_PIC32_SPACES_H
+
+#ifdef CONFIG_PIC32MZDA
+#define PHYS_OFFSET    _AC(0x08000000, UL)
+#define UNCAC_BASE     _AC(0xa8000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_MACH_PIC32_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ralink/irq.h b/arch/mips/include/asm/mach-ralink/irq.h
new file mode 100644 (file)
index 0000000..4321865
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __ASM_MACH_RALINK_IRQ_H
+#define __ASM_MACH_RALINK_IRQ_H
+
+#define GIC_NUM_INTRS  64
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
new file mode 100644 (file)
index 0000000..610b61e
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _MT7621_REGS_H_
+#define _MT7621_REGS_H_
+
+#define MT7621_PALMBUS_BASE            0x1C000000
+#define MT7621_PALMBUS_SIZE            0x03FFFFFF
+
+#define MT7621_SYSC_BASE               0x1E000000
+
+#define SYSC_REG_CHIP_NAME0            0x00
+#define SYSC_REG_CHIP_NAME1            0x04
+#define SYSC_REG_CHIP_REV              0x0c
+#define SYSC_REG_SYSTEM_CONFIG0                0x10
+#define SYSC_REG_SYSTEM_CONFIG1                0x14
+
+#define CHIP_REV_PKG_MASK              0x1
+#define CHIP_REV_PKG_SHIFT             16
+#define CHIP_REV_VER_MASK              0xf
+#define CHIP_REV_VER_SHIFT             8
+#define CHIP_REV_ECO_MASK              0xf
+
+#define MT7621_DRAM_BASE                0x0
+#define MT7621_DDR2_SIZE_MIN           32
+#define MT7621_DDR2_SIZE_MAX           256
+
+#define MT7621_CHIP_NAME0              0x3637544D
+#define MT7621_CHIP_NAME1              0x20203132
+
+#define MIPS_GIC_IRQ_BASE           (MIPS_CPU_IRQ_BASE + 8)
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..15db1b3
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Ralink MT7621 specific CPU feature overrides
+ *
+ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ * Copyright (C) 2015 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This file was derived from: include/asm-mips/cpu-features.h
+ *     Copyright (C) 2003, 2004 Ralf Baechle
+ *     Copyright (C) 2004 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef _MT7621_CPU_FEATURE_OVERRIDES_H
+#define _MT7621_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_3k_cache       0
+#define cpu_has_4k_cache       1
+#define cpu_has_tx39_cache     0
+#define cpu_has_sb1_cache      0
+#define cpu_has_fpu            0
+#define cpu_has_32fpr          0
+#define cpu_has_counter                1
+#define cpu_has_watch          1
+#define cpu_has_divec          1
+
+#define cpu_has_prefetch       1
+#define cpu_has_ejtag          1
+#define cpu_has_llsc           1
+
+#define cpu_has_mips16         1
+#define cpu_has_mdmx           0
+#define cpu_has_mips3d         0
+#define cpu_has_smartmips      0
+
+#define cpu_has_mips32r1       1
+#define cpu_has_mips32r2       1
+#define cpu_has_mips64r1       0
+#define cpu_has_mips64r2       0
+
+#define cpu_has_dsp            1
+#define cpu_has_dsp2           0
+#define cpu_has_mipsmt         1
+
+#define cpu_has_64bits         0
+#define cpu_has_64bit_zero_reg 0
+#define cpu_has_64bit_gp_regs  0
+#define cpu_has_64bit_addresses        0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 32
+
+#define cpu_has_dc_aliases     0
+#define cpu_has_vtag_icache    0
+
+#define cpu_has_rixi           0
+#define cpu_has_tlbinv         0
+#define cpu_has_userlocal      1
+
+#endif /* _MT7621_CPU_FEATURE_OVERRIDES_H */
index 6516e9da51334916b04b6f8689a0857faad5f0ca..b196825a1de9ca1f3b60df1059978569c93ad8c2 100644 (file)
@@ -243,6 +243,10 @@ BUILD_CM_Cx_R_(tcid_8_priority,    0x80)
 #define  CM_GCR_BASE_CMDEFTGT_IOCU0            2
 #define  CM_GCR_BASE_CMDEFTGT_IOCU1            3
 
+/* GCR_RESET_EXT_BASE register fields */
+#define CM_GCR_RESET_EXT_BASE_EVARESET         BIT(31)
+#define CM_GCR_RESET_EXT_BASE_UEB              BIT(30)
+
 /* GCR_ACCESS register fields */
 #define CM_GCR_ACCESS_ACCESSEN_SHF             0
 #define CM_GCR_ACCESS_ACCESSEN_MSK             (_ULCAST_(0xff) << 0)
index 4b89f28047f7118588acf2152ba43e44a5d353e2..1f6ea8352ca90408772f0a7f72f7e213bece28ba 100644 (file)
@@ -52,7 +52,7 @@ do {                                                                  \
        __this_cpu_inc(mipsr2emustats.M);                               \
        err = __get_user(nir, (u32 __user *)regs->cp0_epc);             \
        if (!err) {                                                     \
-               if (nir == BREAK_MATH)                                  \
+               if (nir == BREAK_MATH(0))                               \
                        __this_cpu_inc(mipsr2bdemustats.M);             \
        }                                                               \
        preempt_enable();                                               \
index e43aca183c9918d4138f000c4a385659f8523eae..3ad19ad04d8a282179423e9fcc5ecbb5557d26c7 100644 (file)
 #define CAUSEF_IV              (_ULCAST_(1)   << 23)
 #define CAUSEB_PCI             26
 #define CAUSEF_PCI             (_ULCAST_(1)   << 26)
+#define CAUSEB_DC              27
+#define CAUSEF_DC              (_ULCAST_(1)   << 27)
 #define CAUSEB_CE              28
 #define CAUSEF_CE              (_ULCAST_(3)   << 28)
 #define CAUSEB_TI              30
 #define CAUSEB_BD              31
 #define CAUSEF_BD              (_ULCAST_(1)   << 31)
 
+/*
+ * Cause.ExcCode trap codes.
+ */
+#define EXCCODE_INT            0       /* Interrupt pending */
+#define EXCCODE_MOD            1       /* TLB modified fault */
+#define EXCCODE_TLBL           2       /* TLB miss on load or ifetch */
+#define EXCCODE_TLBS           3       /* TLB miss on a store */
+#define EXCCODE_ADEL           4       /* Address error on a load or ifetch */
+#define EXCCODE_ADES           5       /* Address error on a store */
+#define EXCCODE_IBE            6       /* Bus error on an ifetch */
+#define EXCCODE_DBE            7       /* Bus error on a load or store */
+#define EXCCODE_SYS            8       /* System call */
+#define EXCCODE_BP             9       /* Breakpoint */
+#define EXCCODE_RI             10      /* Reserved instruction exception */
+#define EXCCODE_CPU            11      /* Coprocessor unusable */
+#define EXCCODE_OV             12      /* Arithmetic overflow */
+#define EXCCODE_TR             13      /* Trap instruction */
+#define EXCCODE_MSAFPE         14      /* MSA floating point exception */
+#define EXCCODE_FPE            15      /* Floating point exception */
+#define EXCCODE_TLBRI          19      /* TLB Read-Inhibit exception */
+#define EXCCODE_TLBXI          20      /* TLB Execution-Inhibit exception */
+#define EXCCODE_MSADIS         21      /* MSA disabled exception */
+#define EXCCODE_MDMX           22      /* MDMX unusable exception */
+#define EXCCODE_WATCH          23      /* Watch address reference */
+#define EXCCODE_MCHECK         24      /* Machine check */
+#define EXCCODE_THREAD         25      /* Thread exceptions (MT) */
+#define EXCCODE_DSPDIS         26      /* DSP disabled exception */
+#define EXCCODE_GE             27      /* Virtualized guest exception (VZ) */
+
+/* Implementation specific trap codes used by MIPS cores */
+#define MIPS_EXCCODE_TLBPAR    16      /* TLB parity error exception */
+
 /*
  * Bits in the coprocessor 0 config register.
  */
index 2046c023022406d8a336197db13df1a75b1e8021..21ed7150fec3f4847aef98ac45cd56241b4f50b3 100644 (file)
@@ -33,7 +33,7 @@
 #define PAGE_SHIFT     16
 #endif
 #define PAGE_SIZE      (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK      (~(PAGE_SIZE - 1))
+#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
 
 /*
  * This is used for calculating the real page sizes
index 6995b4a02e2359bf6e2e1b8493bcae55443753f1..9a4fe0133ff1c7d82685d426dc1a91b371a972a6 100644 (file)
@@ -353,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
 static inline pte_t pte_mkyoung(pte_t pte)
 {
        pte_val(pte) |= _PAGE_ACCESSED;
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (!(pte_val(pte) & _PAGE_NO_READ))
                pte_val(pte) |= _PAGE_SILENT_READ;
        else
@@ -542,7 +542,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
        pmd_val(pmd) |= _PAGE_ACCESSED;
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (!(pmd_val(pmd) & _PAGE_NO_READ))
                pmd_val(pmd) |= _PAGE_SILENT_READ;
        else
index 9b44d5a816fa3ee64faf18a80c7f415dfb681a4d..ddea53e3a9bb7bbd56463a0727ff2b3bc378f614 100644 (file)
@@ -116,7 +116,8 @@ enum cop_op {
        dmtc_op       = 0x05, ctc_op        = 0x06,
        mthc0_op      = 0x06, mthc_op       = 0x07,
        bc_op         = 0x08, bc1eqz_op     = 0x09,
-       bc1nez_op     = 0x0d, cop_op        = 0x10,
+       mfmc0_op      = 0x0b, bc1nez_op     = 0x0d,
+       wrpgpr_op     = 0x0e, cop_op        = 0x10,
        copm_op       = 0x18
 };
 
@@ -529,7 +530,7 @@ enum MIPS6e_i8_func {
 };
 
 /*
- * (microMIPS & MIPS16e) NOP instruction.
+ * (microMIPS) NOP instruction.
  */
 #define MM_NOP16       0x0c00
 
@@ -679,7 +680,7 @@ struct fp0_format {         /* FPU multiply and add format (MIPS32) */
        ;))))))
 };
 
-struct mm_fp0_format {         /* FPU multipy and add format (microMIPS) */
+struct mm_fp0_format {         /* FPU multiply and add format (microMIPS) */
        __BITFIELD_FIELD(unsigned int opcode : 6,
        __BITFIELD_FIELD(unsigned int ft : 5,
        __BITFIELD_FIELD(unsigned int fs : 5,
@@ -799,6 +800,13 @@ struct mm_x_format {               /* Scaled indexed load format (microMIPS) */
        ;)))))
 };
 
+struct mm_a_format {           /* ADDIUPC format (microMIPS) */
+       __BITFIELD_FIELD(unsigned int opcode : 6,
+       __BITFIELD_FIELD(unsigned int rs : 3,
+       __BITFIELD_FIELD(signed int simmediate : 23,
+       ;)))
+};
+
 /*
  * microMIPS instruction formats (16-bit length)
  */
@@ -940,6 +948,7 @@ union mips_instruction {
        struct mm_i_format mm_i_format;
        struct mm_m_format mm_m_format;
        struct mm_x_format mm_x_format;
+       struct mm_a_format mm_a_format;
        struct mm_b0_format mm_b0_format;
        struct mm_b1_format mm_b1_format;
        struct mm16_m_format mm16_m_format ;
index 09f4034f239f511867a4d56792de909d0ad0c773..6392dbe504fb3b4050210f7a5ca993362efa352b 100644 (file)
@@ -190,7 +190,7 @@ static inline void check_daddi(void)
        printk("Checking for the daddi bug... ");
 
        local_irq_save(flags);
-       handler = set_except_vector(12, handle_daddi_ov);
+       handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
        /*
         * The following code fails to trigger an overflow exception
         * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
@@ -214,7 +214,7 @@ static inline void check_daddi(void)
                ".set   pop"
                : "=r" (v), "=&r" (tmp)
                : "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-       set_except_vector(12, handler);
+       set_except_vector(EXCCODE_OV, handler);
        local_irq_restore(flags);
 
        if (daddi_ov) {
@@ -225,14 +225,14 @@ static inline void check_daddi(void)
        printk("yes, workaround... ");
 
        local_irq_save(flags);
-       handler = set_except_vector(12, handle_daddi_ov);
+       handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
        asm volatile(
                "addiu  %1, $0, %2\n\t"
                "dsrl   %1, %1, 1\n\t"
                "daddi  %0, %1, %3"
                : "=r" (v), "=&r" (tmp)
                : "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-       set_except_vector(12, handler);
+       set_except_vector(EXCCODE_OV, handler);
        local_irq_restore(flags);
 
        if (daddi_ov) {
index 6b9064499bd3dfb49ef0c09ad3da48034dea0ff2..b725b713b9f8b56e3763784c18b0550dffd77b6d 100644 (file)
@@ -98,6 +98,161 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
        c->fpu_msk31 = ~(fcsr0 ^ fcsr1) & ~mask;
 }
 
+/*
+ * Determine the IEEE 754 NaN encodings and ABS.fmt/NEG.fmt execution modes
+ * supported by FPU hardware.
+ */
+static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
+{
+       if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+                           MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+                           MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+               unsigned long sr, fir, fcsr, fcsr0, fcsr1;
+
+               sr = read_c0_status();
+               __enable_fpu(FPU_AS_IS);
+
+               fir = read_32bit_cp1_register(CP1_REVISION);
+               if (fir & MIPS_FPIR_HAS2008) {
+                       fcsr = read_32bit_cp1_register(CP1_STATUS);
+
+                       fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+                       write_32bit_cp1_register(CP1_STATUS, fcsr0);
+                       fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+                       fcsr1 = fcsr | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+                       write_32bit_cp1_register(CP1_STATUS, fcsr1);
+                       fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+                       write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+                       if (!(fcsr0 & FPU_CSR_NAN2008))
+                               c->options |= MIPS_CPU_NAN_LEGACY;
+                       if (fcsr1 & FPU_CSR_NAN2008)
+                               c->options |= MIPS_CPU_NAN_2008;
+
+                       if ((fcsr0 ^ fcsr1) & FPU_CSR_ABS2008)
+                               c->fpu_msk31 &= ~FPU_CSR_ABS2008;
+                       else
+                               c->fpu_csr31 |= fcsr & FPU_CSR_ABS2008;
+
+                       if ((fcsr0 ^ fcsr1) & FPU_CSR_NAN2008)
+                               c->fpu_msk31 &= ~FPU_CSR_NAN2008;
+                       else
+                               c->fpu_csr31 |= fcsr & FPU_CSR_NAN2008;
+               } else {
+                       c->options |= MIPS_CPU_NAN_LEGACY;
+               }
+
+               write_c0_status(sr);
+       } else {
+               c->options |= MIPS_CPU_NAN_LEGACY;
+       }
+}
+
+/*
+ * IEEE 754 conformance mode to use.  Affects the NaN encoding and the
+ * ABS.fmt/NEG.fmt execution mode.
+ */
+static enum { STRICT, LEGACY, STD2008, RELAXED } ieee754 = STRICT;
+
+/*
+ * Set the IEEE 754 NaN encodings and the ABS.fmt/NEG.fmt execution modes
+ * to support by the FPU emulator according to the IEEE 754 conformance
+ * mode selected.  Note that "relaxed" straps the emulator so that it
+ * allows 2008-NaN binaries even for legacy processors.
+ */
+static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
+{
+       c->options &= ~(MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY);
+       c->fpu_csr31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+       c->fpu_msk31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+
+       switch (ieee754) {
+       case STRICT:
+               if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+                                   MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+                                   MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+                       c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+               } else {
+                       c->options |= MIPS_CPU_NAN_LEGACY;
+                       c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               }
+               break;
+       case LEGACY:
+               c->options |= MIPS_CPU_NAN_LEGACY;
+               c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               break;
+       case STD2008:
+               c->options |= MIPS_CPU_NAN_2008;
+               c->fpu_csr31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+               break;
+       case RELAXED:
+               c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+               break;
+       }
+}
+
+/*
+ * Override the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * according to the "ieee754=" parameter.
+ */
+static void cpu_set_nan_2008(struct cpuinfo_mips *c)
+{
+       switch (ieee754) {
+       case STRICT:
+               mips_use_nan_legacy = !!cpu_has_nan_legacy;
+               mips_use_nan_2008 = !!cpu_has_nan_2008;
+               break;
+       case LEGACY:
+               mips_use_nan_legacy = !!cpu_has_nan_legacy;
+               mips_use_nan_2008 = !cpu_has_nan_legacy;
+               break;
+       case STD2008:
+               mips_use_nan_legacy = !cpu_has_nan_2008;
+               mips_use_nan_2008 = !!cpu_has_nan_2008;
+               break;
+       case RELAXED:
+               mips_use_nan_legacy = true;
+               mips_use_nan_2008 = true;
+               break;
+       }
+}
+
+/*
+ * IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode override
+ * settings:
+ *
+ * strict:  accept binaries that request a NaN encoding supported by the FPU
+ * legacy:  only accept legacy-NaN binaries
+ * 2008:    only accept 2008-NaN binaries
+ * relaxed: accept any binaries regardless of whether supported by the FPU
+ */
+static int __init ieee754_setup(char *s)
+{
+       if (!s)
+               return -1;
+       else if (!strcmp(s, "strict"))
+               ieee754 = STRICT;
+       else if (!strcmp(s, "legacy"))
+               ieee754 = LEGACY;
+       else if (!strcmp(s, "2008"))
+               ieee754 = STD2008;
+       else if (!strcmp(s, "relaxed"))
+               ieee754 = RELAXED;
+       else
+               return -1;
+
+       if (!(boot_cpu_data.options & MIPS_CPU_FPU))
+               cpu_set_nofpu_2008(&boot_cpu_data);
+       cpu_set_nan_2008(&boot_cpu_data);
+
+       return 0;
+}
+
+early_param("ieee754", ieee754_setup);
+
 /*
  * Set the FIR feature flags for the FPU emulator.
  */
@@ -113,6 +268,8 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
        if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
                            MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
                value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
+       if (c->options & MIPS_CPU_NAN_2008)
+               value |= MIPS_FPIR_HAS2008;
        c->fpu_id = value;
 }
 
@@ -137,6 +294,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
        }
 
        cpu_set_fpu_fcsr_mask(c);
+       cpu_set_fpu_2008(c);
+       cpu_set_nan_2008(c);
 }
 
 /*
@@ -147,6 +306,8 @@ static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
        c->options &= ~MIPS_CPU_FPU;
        c->fpu_msk31 = mips_nofpu_msk31;
 
+       cpu_set_nofpu_2008(c);
+       cpu_set_nan_2008(c);
        cpu_set_nofpu_id(c);
 }
 
index 4a4d9e067c89427fc34e990586f0e8237a9418ba..c3c234dc0c07748ad7696f7b4d3c395df3954293 100644 (file)
 #include <linux/elf.h>
 #include <linux/sched.h>
 
+#include <asm/cpu-info.h>
+
+/* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
+bool mips_use_nan_legacy;
+bool mips_use_nan_2008;
+
 /* FPU modes */
 enum {
        FP_FRE,
@@ -68,15 +74,23 @@ static struct mode_req none_req = { true, true, false, true, true };
 int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
                     bool is_interp, struct arch_elf_state *state)
 {
-       struct elf32_hdr *ehdr32 = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *ehdr = _ehdr;
        struct elf32_phdr *phdr32 = _phdr;
        struct elf64_phdr *phdr64 = _phdr;
        struct mips_elf_abiflags_v0 abiflags;
+       bool elf32;
+       u32 flags;
        int ret;
 
+       elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+       flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
        /* Lets see if this is an O32 ELF */
-       if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
-               if (ehdr32->e_flags & EF_MIPS_FP64) {
+       if (elf32) {
+               if (flags & EF_MIPS_FP64) {
                        /*
                         * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
                         * later if needed
@@ -120,13 +134,50 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
        return 0;
 }
 
-int arch_check_elf(void *_ehdr, bool has_interpreter,
+int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
                   struct arch_elf_state *state)
 {
-       struct elf32_hdr *ehdr = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *ehdr = _ehdr;
+       union {
+               struct elf32_hdr e32;
+               struct elf64_hdr e64;
+       } *iehdr = _interp_ehdr;
        struct mode_req prog_req, interp_req;
        int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
-       bool is_mips64;
+       bool elf32;
+       u32 flags;
+
+       elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+       flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
+       /*
+        * Determine the NaN personality, reject the binary if not allowed.
+        * Also ensure that any interpreter matches the executable.
+        */
+       if (flags & EF_MIPS_NAN2008) {
+               if (mips_use_nan_2008)
+                       state->nan_2008 = 1;
+               else
+                       return -ENOEXEC;
+       } else {
+               if (mips_use_nan_legacy)
+                       state->nan_2008 = 0;
+               else
+                       return -ENOEXEC;
+       }
+       if (has_interpreter) {
+               bool ielf32;
+               u32 iflags;
+
+               ielf32 = iehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+               iflags = ielf32 ? iehdr->e32.e_flags : iehdr->e64.e_flags;
+
+               if ((flags ^ iflags) & EF_MIPS_NAN2008)
+                       return -ELIBBAD;
+       }
 
        if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
                return 0;
@@ -142,21 +193,18 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
                abi0 = abi1 = fp_abi;
        }
 
-       is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
-                   (ehdr->e_flags & EF_MIPS_ABI2);
+       if (elf32 && !(flags & EF_MIPS_ABI2)) {
+               /* Default to a mode capable of running code expecting FR=0 */
+               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
 
-       if (is_mips64) {
+               /* Allow all ABIs we know about */
+               max_abi = MIPS_ABI_FP_64A;
+       } else {
                /* MIPS64 code always uses FR=1, thus the default is easy */
                state->overall_fp_mode = FP_FR1;
 
                /* Disallow access to the various FPXX & FP64 ABIs */
                max_abi = MIPS_ABI_FP_SOFT;
-       } else {
-               /* Default to a mode capable of running code expecting FR=0 */
-               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
-
-               /* Allow all ABIs we know about */
-               max_abi = MIPS_ABI_FP_64A;
        }
 
        if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
@@ -254,3 +302,27 @@ void mips_set_personality_fp(struct arch_elf_state *state)
                BUG();
        }
 }
+
+/*
+ * Select the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * in FCSR according to the ELF NaN personality.
+ */
+void mips_set_personality_nan(struct arch_elf_state *state)
+{
+       struct cpuinfo_mips *c = &boot_cpu_data;
+       struct task_struct *t = current;
+
+       t->thread.fpu.fcr31 = c->fpu_csr31;
+       switch (state->nan_2008) {
+       case 0:
+               break;
+       case 1:
+               if (!(c->fpu_msk31 & FPU_CSR_NAN2008))
+                       t->thread.fpu.fcr31 |= FPU_CSR_NAN2008;
+               if (!(c->fpu_msk31 & FPU_CSR_ABS2008))
+                       t->thread.fpu.fcr31 |= FPU_CSR_ABS2008;
+               break;
+       default:
+               BUG();
+       }
+}
index c6854d9df926d5c44e3654bcc7fd1d57136741b8..705be43c35333f648f04a96f26a4104ba172c46c 100644 (file)
@@ -21,7 +21,7 @@ static struct txx9_pio_reg __iomem *txx9_pioptr;
 
 static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset)
 {
-       return __raw_readl(&txx9_pioptr->din) & (1 << offset);
+       return !!(__raw_readl(&txx9_pioptr->din) & (1 << offset));
 }
 
 static void txx9_gpio_set_raw(unsigned int offset, int value)
index 4f0ac78d17f196e7c34de33af8bdcd4f708392e3..a5279b2f31989f8c8dcd915fccb233a4ada33332 100644 (file)
@@ -548,9 +548,6 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(c0_badvaddr, cp0_badvaddr),
        REG_OFFSET_NAME(c0_cause, cp0_cause),
        REG_OFFSET_NAME(c0_epc, cp0_epc),
-#ifdef CONFIG_MIPS_MT_SMTC
-       REG_OFFSET_NAME(c0_tcstatus, cp0_tcstatus),
-#endif
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
        REG_OFFSET_NAME(mpl0, mpl[0]),
        REG_OFFSET_NAME(mpl1, mpl[1]),
index 66aac55df3497f79d39f1b203601eccf3a25a48f..569a7d5242ddda902e7ab1a03430ab14c712e5cd 100644 (file)
@@ -623,7 +623,7 @@ static void __init request_crashkernel(struct resource *res)
 
 #define USE_PROM_CMDLINE       IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
 #define USE_DTB_CMDLINE                IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_EXTEND)
+#define EXTEND_WITH_PROM       IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
 
 static void __init arch_mem_init(char **cmdline_p)
 {
index e04c8057b88238956efe64c519dfa3aac41e1ba9..2ad4e4c96d61cb02f8703d46efde1c596649a5df 100644 (file)
@@ -202,6 +202,9 @@ static void boot_core(unsigned core)
        /* Ensure its coherency is disabled */
        write_gcr_co_coherence(0);
 
+       /* Start it with the legacy memory map and exception base */
+       write_gcr_co_reset_ext_base(CM_GCR_RESET_EXT_BASE_UEB);
+
        /* Ensure the core can access the GCRs */
        access = read_gcr_access();
        access |= 1 << (CM_GCR_ACCESS_ACCESSEN_SHF + core);
index 2242bdd4370eb19851ff5a31540a02c65fa26498..4472a7f985776ad78a6e032a333cacd5bb2c7415 100644 (file)
 #include <asm/barrier.h>
 #include <asm/mipsregs.h>
 
-static atomic_t count_start_flag = ATOMIC_INIT(0);
+static unsigned int initcount = 0;
 static atomic_t count_count_start = ATOMIC_INIT(0);
 static atomic_t count_count_stop = ATOMIC_INIT(0);
-static atomic_t count_reference = ATOMIC_INIT(0);
 
 #define COUNTON 100
-#define NR_LOOPS 5
+#define NR_LOOPS 3
 
 void synchronise_count_master(int cpu)
 {
        int i;
        unsigned long flags;
-       unsigned int initcount;
 
        printk(KERN_INFO "Synchronize counters for CPU %u: ", cpu);
 
        local_irq_save(flags);
 
-       /*
-        * Notify the slaves that it's time to start
-        */
-       atomic_set(&count_reference, read_c0_count());
-       atomic_set(&count_start_flag, cpu);
-       smp_wmb();
-
-       /* Count will be initialised to current timer for all CPU's */
-       initcount = read_c0_count();
-
        /*
         * We loop a few times to get a primed instruction cache,
         * then the last pass is more or less synchronised and
@@ -63,9 +51,13 @@ void synchronise_count_master(int cpu)
                atomic_set(&count_count_stop, 0);
                smp_wmb();
 
-               /* this lets the slaves write their count register */
+               /* Let the slave writes its count register */
                atomic_inc(&count_count_start);
 
+               /* Count will be initialised to current timer */
+               if (i == 1)
+                       initcount = read_c0_count();
+
                /*
                 * Everyone initialises count in the last loop:
                 */
@@ -73,7 +65,7 @@ void synchronise_count_master(int cpu)
                        write_c0_count(initcount);
 
                /*
-                * Wait for all slaves to leave the synchronization point:
+                * Wait for slave to leave the synchronization point:
                 */
                while (atomic_read(&count_count_stop) != 1)
                        mb();
@@ -83,7 +75,6 @@ void synchronise_count_master(int cpu)
        }
        /* Arrange for an interrupt in a short while */
        write_c0_compare(read_c0_count() + COUNTON);
-       atomic_set(&count_start_flag, 0);
 
        local_irq_restore(flags);
 
@@ -98,19 +89,12 @@ void synchronise_count_master(int cpu)
 void synchronise_count_slave(int cpu)
 {
        int i;
-       unsigned int initcount;
 
        /*
         * Not every cpu is online at the time this gets called,
         * so we first wait for the master to say everyone is ready
         */
 
-       while (atomic_read(&count_start_flag) != cpu)
-               mb();
-
-       /* Count will be initialised to next expire for all CPU's */
-       initcount = atomic_read(&count_reference);
-
        for (i = 0; i < NR_LOOPS; i++) {
                atomic_inc(&count_count_start);
                while (atomic_read(&count_count_start) != 2)
index 886cb1976e90f682dafac7a958043d68bc4b6473..bafcb7ad5c854d5109010485dd190f12486c363e 100644 (file)
@@ -2250,7 +2250,7 @@ void __init trap_init(void)
         * Only some CPUs have the watch exceptions.
         */
        if (cpu_has_watch)
-               set_except_vector(23, handle_watch);
+               set_except_vector(EXCCODE_WATCH, handle_watch);
 
        /*
         * Initialise interrupt handlers
@@ -2277,27 +2277,27 @@ void __init trap_init(void)
        if (board_be_init)
                board_be_init();
 
-       set_except_vector(0, using_rollback_handler() ? rollback_handle_int
-                                                     : handle_int);
-       set_except_vector(1, handle_tlbm);
-       set_except_vector(2, handle_tlbl);
-       set_except_vector(3, handle_tlbs);
+       set_except_vector(EXCCODE_INT, using_rollback_handler() ?
+                                       rollback_handle_int : handle_int);
+       set_except_vector(EXCCODE_MOD, handle_tlbm);
+       set_except_vector(EXCCODE_TLBL, handle_tlbl);
+       set_except_vector(EXCCODE_TLBS, handle_tlbs);
 
-       set_except_vector(4, handle_adel);
-       set_except_vector(5, handle_ades);
+       set_except_vector(EXCCODE_ADEL, handle_adel);
+       set_except_vector(EXCCODE_ADES, handle_ades);
 
-       set_except_vector(6, handle_ibe);
-       set_except_vector(7, handle_dbe);
+       set_except_vector(EXCCODE_IBE, handle_ibe);
+       set_except_vector(EXCCODE_DBE, handle_dbe);
 
-       set_except_vector(8, handle_sys);
-       set_except_vector(9, handle_bp);
-       set_except_vector(10, rdhwr_noopt ? handle_ri :
+       set_except_vector(EXCCODE_SYS, handle_sys);
+       set_except_vector(EXCCODE_BP, handle_bp);
+       set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
                          (cpu_has_vtag_icache ?
                           handle_ri_rdhwr_vivt : handle_ri_rdhwr));
-       set_except_vector(11, handle_cpu);
-       set_except_vector(12, handle_ov);
-       set_except_vector(13, handle_tr);
-       set_except_vector(14, handle_msa_fpe);
+       set_except_vector(EXCCODE_CPU, handle_cpu);
+       set_except_vector(EXCCODE_OV, handle_ov);
+       set_except_vector(EXCCODE_TR, handle_tr);
+       set_except_vector(EXCCODE_MSAFPE, handle_msa_fpe);
 
        if (current_cpu_type() == CPU_R6000 ||
            current_cpu_type() == CPU_R6000A) {
@@ -2318,25 +2318,25 @@ void __init trap_init(void)
                board_nmi_handler_setup();
 
        if (cpu_has_fpu && !cpu_has_nofpuex)
-               set_except_vector(15, handle_fpe);
+               set_except_vector(EXCCODE_FPE, handle_fpe);
 
-       set_except_vector(16, handle_ftlb);
+       set_except_vector(MIPS_EXCCODE_TLBPAR, handle_ftlb);
 
        if (cpu_has_rixiex) {
-               set_except_vector(19, tlb_do_page_fault_0);
-               set_except_vector(20, tlb_do_page_fault_0);
+               set_except_vector(EXCCODE_TLBRI, tlb_do_page_fault_0);
+               set_except_vector(EXCCODE_TLBXI, tlb_do_page_fault_0);
        }
 
-       set_except_vector(21, handle_msa);
-       set_except_vector(22, handle_mdmx);
+       set_except_vector(EXCCODE_MSADIS, handle_msa);
+       set_except_vector(EXCCODE_MDMX, handle_mdmx);
 
        if (cpu_has_mcheck)
-               set_except_vector(24, handle_mcheck);
+               set_except_vector(EXCCODE_MCHECK, handle_mcheck);
 
        if (cpu_has_mipsmt)
-               set_except_vector(25, handle_mt);
+               set_except_vector(EXCCODE_THREAD, handle_mt);
 
-       set_except_vector(26, handle_dsp);
+       set_except_vector(EXCCODE_DSPDIS, handle_dsp);
 
        if (board_cache_error_setup)
                board_cache_error_setup();
index 313c2e37b978ba730372baafdc51dd707ed8e86b..d88aa2173fb0b16337e3c0ae06d86c87fb959ad4 100644 (file)
@@ -11,4 +11,4 @@
 #include <linux/kvm_host.h>
 
 struct kvm_mips_callbacks *kvm_mips_callbacks;
-EXPORT_SYMBOL(kvm_mips_callbacks);
+EXPORT_SYMBOL_GPL(kvm_mips_callbacks);
index 521121bdebff94a622b280544fcacddab1434b5d..f1527a465c1b1b071356b18ea066a468b30edabb 100644 (file)
@@ -86,10 +86,8 @@ int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
        } else {
                mfc0_inst = LW_TEMPLATE;
                mfc0_inst |= ((rt & 0x1f) << 16);
-               mfc0_inst |=
-                   offsetof(struct mips_coproc,
-                            reg[rd][sel]) + offsetof(struct kvm_mips_commpage,
-                                                     cop0);
+               mfc0_inst |= offsetof(struct kvm_mips_commpage,
+                                     cop0.reg[rd][sel]);
        }
 
        if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
@@ -123,9 +121,7 @@ int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
        sel = inst & 0x7;
 
        mtc0_inst |= ((rt & 0x1f) << 16);
-       mtc0_inst |=
-           offsetof(struct mips_coproc,
-                    reg[rd][sel]) + offsetof(struct kvm_mips_commpage, cop0);
+       mtc0_inst |= offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
 
        if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
                kseg0_opc =
index 1b675c7ce89f89d25f3457659405419505a8158d..b37954cc880d6ce06d9ca408962a4fabf7e7de06 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/random.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/cacheops.h>
 #include <asm/cpu-info.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -29,7 +30,6 @@
 #include <asm/r4kcache.h>
 #define CONFIG_MIPS_MT
 
-#include "opcode.h"
 #include "interrupt.h"
 #include "commpage.h"
 
@@ -1239,21 +1239,20 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                        er = EMULATE_FAIL;
                        break;
 
-               case mfmcz_op:
+               case mfmc0_op:
 #ifdef KVM_MIPS_DEBUG_COP0_COUNTERS
                        cop0->stat[MIPS_CP0_STATUS][0]++;
 #endif
-                       if (rt != 0) {
+                       if (rt != 0)
                                vcpu->arch.gprs[rt] =
                                    kvm_read_c0_guest_status(cop0);
-                       }
                        /* EI */
                        if (inst & 0x20) {
-                               kvm_debug("[%#lx] mfmcz_op: EI\n",
+                               kvm_debug("[%#lx] mfmc0_op: EI\n",
                                          vcpu->arch.pc);
                                kvm_set_c0_guest_status(cop0, ST0_IE);
                        } else {
-                               kvm_debug("[%#lx] mfmcz_op: DI\n",
+                               kvm_debug("[%#lx] mfmc0_op: DI\n",
                                          vcpu->arch.pc);
                                kvm_clear_c0_guest_status(cop0, ST0_IE);
                        }
@@ -1545,19 +1544,6 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
        return 0;
 }
 
-#define MIPS_CACHE_OP_INDEX_INV         0x0
-#define MIPS_CACHE_OP_INDEX_LD_TAG      0x1
-#define MIPS_CACHE_OP_INDEX_ST_TAG      0x2
-#define MIPS_CACHE_OP_IMP               0x3
-#define MIPS_CACHE_OP_HIT_INV           0x4
-#define MIPS_CACHE_OP_FILL_WB_INV       0x5
-#define MIPS_CACHE_OP_HIT_HB            0x6
-#define MIPS_CACHE_OP_FETCH_LOCK        0x7
-
-#define MIPS_CACHE_ICACHE               0x0
-#define MIPS_CACHE_DCACHE               0x1
-#define MIPS_CACHE_SEC                  0x3
-
 enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
                                             uint32_t cause,
                                             struct kvm_run *run,
@@ -1582,8 +1568,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
        base = (inst >> 21) & 0x1f;
        op_inst = (inst >> 16) & 0x1f;
        offset = (int16_t)inst;
-       cache = (inst >> 16) & 0x3;
-       op = (inst >> 18) & 0x7;
+       cache = op_inst & CacheOp_Cache;
+       op = op_inst & CacheOp_Op;
 
        va = arch->gprs[base] + offset;
 
@@ -1595,14 +1581,14 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
         * invalidate the caches entirely by stepping through all the
         * ways/indexes
         */
-       if (op == MIPS_CACHE_OP_INDEX_INV) {
+       if (op == Index_Writeback_Inv) {
                kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
                          vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
                          arch->gprs[base], offset);
 
-               if (cache == MIPS_CACHE_DCACHE)
+               if (cache == Cache_D)
                        r4k_blast_dcache();
-               else if (cache == MIPS_CACHE_ICACHE)
+               else if (cache == Cache_I)
                        r4k_blast_icache();
                else {
                        kvm_err("%s: unsupported CACHE INDEX operation\n",
@@ -1675,9 +1661,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 
 skip_fault:
        /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
-       if (cache == MIPS_CACHE_DCACHE
-           && (op == MIPS_CACHE_OP_FILL_WB_INV
-               || op == MIPS_CACHE_OP_HIT_INV)) {
+       if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
                flush_dcache_line(va);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -1687,7 +1671,7 @@ skip_fault:
                 */
                kvm_mips_trans_cache_va(inst, opc, vcpu);
 #endif
-       } else if (op == MIPS_CACHE_OP_HIT_INV && cache == MIPS_CACHE_ICACHE) {
+       } else if (op_inst == Hit_Invalidate_I) {
                flush_dcache_line(va);
                flush_icache_line(va);
 
@@ -1781,7 +1765,7 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
                kvm_debug("Delivering SYSCALL @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_SYSCALL << CAUSEB_EXCCODE));
+                                         (EXCCODE_SYS << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -1828,7 +1812,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBL << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1874,7 +1858,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBL << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1918,7 +1902,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBS << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1962,7 +1946,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
        }
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+                                 (EXCCODE_TLBS << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2033,7 +2017,8 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
        }
 
-       kvm_change_c0_guest_cause(cop0, (0xff), (T_TLB_MOD << CAUSEB_EXCCODE));
+       kvm_change_c0_guest_cause(cop0, (0xff),
+                                 (EXCCODE_MOD << CAUSEB_EXCCODE));
 
        /* setup badvaddr, context and entryhi registers for the guest */
        kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2068,7 +2053,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
        arch->pc = KVM_GUEST_KSEG0 + 0x180;
 
        kvm_change_c0_guest_cause(cop0, (0xff),
-                                 (T_COP_UNUSABLE << CAUSEB_EXCCODE));
+                                 (EXCCODE_CPU << CAUSEB_EXCCODE));
        kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE));
 
        return EMULATE_DONE;
@@ -2096,7 +2081,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
                kvm_debug("Delivering RI @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_RES_INST << CAUSEB_EXCCODE));
+                                         (EXCCODE_RI << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2131,7 +2116,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
                kvm_debug("Delivering BP @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_BREAK << CAUSEB_EXCCODE));
+                                         (EXCCODE_BP << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2166,7 +2151,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
                kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_TRAP << CAUSEB_EXCCODE));
+                                         (EXCCODE_TR << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2201,7 +2186,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
                kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_MSAFPE << CAUSEB_EXCCODE));
+                                         (EXCCODE_MSAFPE << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2236,7 +2221,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
                kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_FPE << CAUSEB_EXCCODE));
+                                         (EXCCODE_FPE << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2271,7 +2256,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
                kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
 
                kvm_change_c0_guest_cause(cop0, (0xff),
-                                         (T_MSADIS << CAUSEB_EXCCODE));
+                                         (EXCCODE_MSADIS << CAUSEB_EXCCODE));
 
                /* Set PC to the exception entry point */
                arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2480,25 +2465,25 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 
        if (usermode) {
                switch (exccode) {
-               case T_INT:
-               case T_SYSCALL:
-               case T_BREAK:
-               case T_RES_INST:
-               case T_TRAP:
-               case T_MSAFPE:
-               case T_FPE:
-               case T_MSADIS:
+               case EXCCODE_INT:
+               case EXCCODE_SYS:
+               case EXCCODE_BP:
+               case EXCCODE_RI:
+               case EXCCODE_TR:
+               case EXCCODE_MSAFPE:
+               case EXCCODE_FPE:
+               case EXCCODE_MSADIS:
                        break;
 
-               case T_COP_UNUSABLE:
+               case EXCCODE_CPU:
                        if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 0)
                                er = EMULATE_PRIV_FAIL;
                        break;
 
-               case T_TLB_MOD:
+               case EXCCODE_MOD:
                        break;
 
-               case T_TLB_LD_MISS:
+               case EXCCODE_TLBL:
                        /*
                         * We we are accessing Guest kernel space, then send an
                         * address error exception to the guest
@@ -2507,12 +2492,12 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                                kvm_debug("%s: LD MISS @ %#lx\n", __func__,
                                          badvaddr);
                                cause &= ~0xff;
-                               cause |= (T_ADDR_ERR_LD << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_ADEL << CAUSEB_EXCCODE);
                                er = EMULATE_PRIV_FAIL;
                        }
                        break;
 
-               case T_TLB_ST_MISS:
+               case EXCCODE_TLBS:
                        /*
                         * We we are accessing Guest kernel space, then send an
                         * address error exception to the guest
@@ -2521,26 +2506,26 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                                kvm_debug("%s: ST MISS @ %#lx\n", __func__,
                                          badvaddr);
                                cause &= ~0xff;
-                               cause |= (T_ADDR_ERR_ST << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_ADES << CAUSEB_EXCCODE);
                                er = EMULATE_PRIV_FAIL;
                        }
                        break;
 
-               case T_ADDR_ERR_ST:
+               case EXCCODE_ADES:
                        kvm_debug("%s: address error ST @ %#lx\n", __func__,
                                  badvaddr);
                        if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
                                cause &= ~0xff;
-                               cause |= (T_TLB_ST_MISS << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_TLBS << CAUSEB_EXCCODE);
                        }
                        er = EMULATE_PRIV_FAIL;
                        break;
-               case T_ADDR_ERR_LD:
+               case EXCCODE_ADEL:
                        kvm_debug("%s: address error LD @ %#lx\n", __func__,
                                  badvaddr);
                        if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
                                cause &= ~0xff;
-                               cause |= (T_TLB_LD_MISS << CAUSEB_EXCCODE);
+                               cause |= (EXCCODE_TLBL << CAUSEB_EXCCODE);
                        }
                        er = EMULATE_PRIV_FAIL;
                        break;
@@ -2583,13 +2568,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
         * an entry into the guest TLB.
         */
        index = kvm_mips_guest_tlb_lookup(vcpu,
-                                         (va & VPN2_MASK) |
-                                         (kvm_read_c0_guest_entryhi
-                                          (vcpu->arch.cop0) & ASID_MASK));
+                     (va & VPN2_MASK) |
+                     (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & ASID_MASK));
        if (index < 0) {
-               if (exccode == T_TLB_LD_MISS) {
+               if (exccode == EXCCODE_TLBL) {
                        er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu);
-               } else if (exccode == T_TLB_ST_MISS) {
+               } else if (exccode == EXCCODE_TLBS) {
                        er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu);
                } else {
                        kvm_err("%s: invalid exc code: %d\n", __func__,
@@ -2604,10 +2588,10 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
                 * exception to the guest
                 */
                if (!TLB_IS_VALID(*tlb, va)) {
-                       if (exccode == T_TLB_LD_MISS) {
+                       if (exccode == EXCCODE_TLBL) {
                                er = kvm_mips_emulate_tlbinv_ld(cause, opc, run,
                                                                vcpu);
-                       } else if (exccode == T_TLB_ST_MISS) {
+                       } else if (exccode == EXCCODE_TLBS) {
                                er = kvm_mips_emulate_tlbinv_st(cause, opc, run,
                                                                vcpu);
                        } else {
index 9b4445940c2bcc941b217aa3b983a4369f62f382..95f790663b0c2af09a3e0dbf2fe836b58ecfd078 100644 (file)
@@ -128,7 +128,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -137,7 +137,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -146,7 +146,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
@@ -155,7 +155,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                    && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
                    && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) {
                        allowed = 1;
-                       exccode = T_INT;
+                       exccode = EXCCODE_INT;
                }
                break;
 
index 7e2210846b8b9d1519f679e6a0950fcaf1e6dfd5..81687ab1b523eaebce4f2edbaeffc3b515a1b9e9 100644 (file)
@@ -335,7 +335,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
 
        /* Now restore the host state just enough to run the handlers */
 
-       /* Swtich EBASE to the one used by Linux */
+       /* Switch EBASE to the one used by Linux */
        /* load up the host EBASE */
        mfc0    v0, CP0_STATUS
 
@@ -490,11 +490,11 @@ __kvm_mips_return_to_guest:
        REG_ADDU t3, t1, t2
        LONG_L  k0, (t3)
        andi    k0, k0, 0xff
-       mtc0    k0,CP0_ENTRYHI
+       mtc0    k0, CP0_ENTRYHI
        ehb
 
        /* Disable RDHWR access */
-       mtc0    zero,  CP0_HWRENA
+       mtc0    zero, CP0_HWRENA
 
        /* load the guest context from VCPU and return */
        LONG_L  $0, VCPU_R0(k1)
@@ -606,11 +606,11 @@ __kvm_mips_return_to_host:
 
        /* Restore RDHWR access */
        PTR_LI  k0, 0x2000000F
-       mtc0    k0,  CP0_HWRENA
+       mtc0    k0, CP0_HWRENA
 
        /* Restore RA, which is the address we will return to */
-       LONG_L  ra, PT_R31(k1)
-       j       ra
+       LONG_L  ra, PT_R31(k1)
+       j       ra
         nop
 
 VECTOR_END(MIPSX(GuestExceptionEnd))
index b9b803facdbf7594dc700ca828a2ced604e617aa..8bc3977576e6af5f7b5c1a17f306da0f21b26937 100644 (file)
@@ -229,7 +229,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                            kzalloc(npages * sizeof(unsigned long), GFP_KERNEL);
 
                        if (!kvm->arch.guest_pmap) {
-                               kvm_err("Failed to allocate guest PMAP");
+                               kvm_err("Failed to allocate guest PMAP\n");
                                return;
                        }
 
@@ -1264,8 +1264,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 
        switch (exccode) {
-       case T_INT:
-               kvm_debug("[%d]T_INT @ %p\n", vcpu->vcpu_id, opc);
+       case EXCCODE_INT:
+               kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc);
 
                ++vcpu->stat.int_exits;
                trace_kvm_exit(vcpu, INT_EXITS);
@@ -1276,8 +1276,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = RESUME_GUEST;
                break;
 
-       case T_COP_UNUSABLE:
-               kvm_debug("T_COP_UNUSABLE: @ PC: %p\n", opc);
+       case EXCCODE_CPU:
+               kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc);
 
                ++vcpu->stat.cop_unusable_exits;
                trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS);
@@ -1287,13 +1287,13 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ret = RESUME_HOST;
                break;
 
-       case T_TLB_MOD:
+       case EXCCODE_MOD:
                ++vcpu->stat.tlbmod_exits;
                trace_kvm_exit(vcpu, TLBMOD_EXITS);
                ret = kvm_mips_callbacks->handle_tlb_mod(vcpu);
                break;
 
-       case T_TLB_ST_MISS:
+       case EXCCODE_TLBS:
                kvm_debug("TLB ST fault:  cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
                          cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
                          badvaddr);
@@ -1303,7 +1303,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu);
                break;
 
-       case T_TLB_LD_MISS:
+       case EXCCODE_TLBL:
                kvm_debug("TLB LD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
                          cause, opc, badvaddr);
 
@@ -1312,55 +1312,55 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu);
                break;
 
-       case T_ADDR_ERR_ST:
+       case EXCCODE_ADES:
                ++vcpu->stat.addrerr_st_exits;
                trace_kvm_exit(vcpu, ADDRERR_ST_EXITS);
                ret = kvm_mips_callbacks->handle_addr_err_st(vcpu);
                break;
 
-       case T_ADDR_ERR_LD:
+       case EXCCODE_ADEL:
                ++vcpu->stat.addrerr_ld_exits;
                trace_kvm_exit(vcpu, ADDRERR_LD_EXITS);
                ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu);
                break;
 
-       case T_SYSCALL:
+       case EXCCODE_SYS:
                ++vcpu->stat.syscall_exits;
                trace_kvm_exit(vcpu, SYSCALL_EXITS);
                ret = kvm_mips_callbacks->handle_syscall(vcpu);
                break;
 
-       case T_RES_INST:
+       case EXCCODE_RI:
                ++vcpu->stat.resvd_inst_exits;
                trace_kvm_exit(vcpu, RESVD_INST_EXITS);
                ret = kvm_mips_callbacks->handle_res_inst(vcpu);
                break;
 
-       case T_BREAK:
+       case EXCCODE_BP:
                ++vcpu->stat.break_inst_exits;
                trace_kvm_exit(vcpu, BREAK_INST_EXITS);
                ret = kvm_mips_callbacks->handle_break(vcpu);
                break;
 
-       case T_TRAP:
+       case EXCCODE_TR:
                ++vcpu->stat.trap_inst_exits;
                trace_kvm_exit(vcpu, TRAP_INST_EXITS);
                ret = kvm_mips_callbacks->handle_trap(vcpu);
                break;
 
-       case T_MSAFPE:
+       case EXCCODE_MSAFPE:
                ++vcpu->stat.msa_fpe_exits;
                trace_kvm_exit(vcpu, MSA_FPE_EXITS);
                ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
                break;
 
-       case T_FPE:
+       case EXCCODE_FPE:
                ++vcpu->stat.fpe_exits;
                trace_kvm_exit(vcpu, FPE_EXITS);
                ret = kvm_mips_callbacks->handle_fpe(vcpu);
                break;
 
-       case T_MSADIS:
+       case EXCCODE_MSADIS:
                ++vcpu->stat.msa_disabled_exits;
                trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
                ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
@@ -1620,7 +1620,7 @@ static struct notifier_block kvm_mips_csr_die_notifier = {
        .notifier_call = kvm_mips_csr_die_notify,
 };
 
-int __init kvm_mips_init(void)
+static int __init kvm_mips_init(void)
 {
        int ret;
 
@@ -1646,7 +1646,7 @@ int __init kvm_mips_init(void)
        return 0;
 }
 
-void __exit kvm_mips_exit(void)
+static void __exit kvm_mips_exit(void)
 {
        kvm_exit();
 
diff --git a/arch/mips/kvm/opcode.h b/arch/mips/kvm/opcode.h
deleted file mode 100644 (file)
index 03a6ae8..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
- * Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
-
-/* Define opcode values not defined in <asm/isnt.h> */
-
-#ifndef __KVM_MIPS_OPCODE_H__
-#define __KVM_MIPS_OPCODE_H__
-
-/* COP0 Ops */
-#define mfmcz_op       0x0b    /* 01011 */
-#define wrpgpr_op      0x0e    /* 01110 */
-
-/* COP0 opcodes (only if COP0 and CO=1): */
-#define wait_op                0x20    /* 100000 */
-
-#endif /* __KVM_MIPS_OPCODE_H__ */
index 570479c03bdc35009f48985e70d17f00686b5012..a08c439462472e3a2e441c1238d62cdee2988ed7 100644 (file)
 #define PRIx64 "llx"
 
 atomic_t kvm_mips_instance;
-EXPORT_SYMBOL(kvm_mips_instance);
+EXPORT_SYMBOL_GPL(kvm_mips_instance);
 
 /* These function pointers are initialized once the KVM module is loaded */
 kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
-EXPORT_SYMBOL(kvm_mips_gfn_to_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_gfn_to_pfn);
 
 void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_release_pfn_clean);
+EXPORT_SYMBOL_GPL(kvm_mips_release_pfn_clean);
 
 bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_is_error_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_is_error_pfn);
 
 uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
 {
@@ -111,7 +111,7 @@ void kvm_mips_dump_host_tlbs(void)
        mtc0_tlbw_hazard();
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_mips_dump_host_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs);
 
 void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -139,7 +139,7 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
                         (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
        }
 }
-EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
 
 static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
 {
@@ -191,7 +191,7 @@ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
 
        return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
 }
-EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa);
+EXPORT_SYMBOL_GPL(kvm_mips_translate_guest_kseg0_to_hpa);
 
 /* XXXKYMA: Must be called with interrupts disabled */
 /* set flush_dcache_mask == 0 if no dcache flush required */
@@ -308,7 +308,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
        return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
                                       flush_dcache_mask);
 }
-EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
 
 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
        struct kvm_vcpu *vcpu)
@@ -351,7 +351,7 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 
        return 0;
 }
-EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
 
 int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
                                         struct kvm_mips_tlb *tlb,
@@ -401,7 +401,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
        return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
                                       tlb->tlb_mask);
 }
-EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
 
 int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 {
@@ -422,7 +422,7 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 
        return index;
 }
-EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup);
 
 int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 {
@@ -458,7 +458,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 
        return idx;
 }
-EXPORT_SYMBOL(kvm_mips_host_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup);
 
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 {
@@ -505,44 +505,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 
        return 0;
 }
-EXPORT_SYMBOL(kvm_mips_host_tlb_inv);
-
-/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID */
-int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index)
-{
-       unsigned long flags, old_entryhi;
-
-       if (index >= current_cpu_data.tlbsize)
-               BUG();
-
-       local_irq_save(flags);
-
-       old_entryhi = read_c0_entryhi();
-
-       write_c0_entryhi(UNIQUE_ENTRYHI(index));
-       mtc0_tlbw_hazard();
-
-       write_c0_index(index);
-       mtc0_tlbw_hazard();
-
-       write_c0_entrylo0(0);
-       mtc0_tlbw_hazard();
-
-       write_c0_entrylo1(0);
-       mtc0_tlbw_hazard();
-
-       tlb_write_indexed();
-       mtc0_tlbw_hazard();
-       tlbw_use_hazard();
-
-       write_c0_entryhi(old_entryhi);
-       mtc0_tlbw_hazard();
-       tlbw_use_hazard();
-
-       local_irq_restore(flags);
-
-       return 0;
-}
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
 
 void kvm_mips_flush_host_tlb(int skip_kseg0)
 {
@@ -594,7 +557,7 @@ void kvm_mips_flush_host_tlb(int skip_kseg0)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_mips_flush_host_tlb);
+EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
 
 void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
                             struct kvm_vcpu *vcpu)
@@ -642,7 +605,7 @@ void kvm_local_flush_tlb_all(void)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_local_flush_tlb_all);
+EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
 
 /**
  * kvm_mips_migrate_count() - Migrate timer.
@@ -673,8 +636,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        local_irq_save(flags);
 
-       if (((vcpu->arch.
-             guest_kernel_asid[cpu] ^ asid_cache(cpu)) & ASID_VERSION_MASK)) {
+       if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
+                                                       ASID_VERSION_MASK) {
                kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
                vcpu->arch.guest_kernel_asid[cpu] =
                    vcpu->arch.guest_kernel_mm.context.asid[cpu];
@@ -739,7 +702,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        local_irq_restore(flags);
 
 }
-EXPORT_SYMBOL(kvm_arch_vcpu_load);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_load);
 
 /* ASID can change if another task is scheduled during preemption */
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -768,7 +731,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(kvm_arch_vcpu_put);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_put);
 
 uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
 {
@@ -813,4 +776,4 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
 
        return inst;
 }
-EXPORT_SYMBOL(kvm_get_inst);
+EXPORT_SYMBOL_GPL(kvm_get_inst);
index d836ed5b0bc7ea38e36350304a6238a520e0d74d..ad988000563f264d443b92bd0785e40ba78fe95d 100644 (file)
@@ -16,7 +16,6 @@
 
 #include <linux/kvm_host.h>
 
-#include "opcode.h"
 #include "interrupt.h"
 
 static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
index 272af8ac2425290c892849186b170f4b95c259b6..5530070e0d05dd51248afb9a8780eb3a86b173a7 100644 (file)
@@ -57,7 +57,6 @@ notrace void arch_local_irq_disable(void)
 }
 EXPORT_SYMBOL(arch_local_irq_disable);
 
-
 notrace unsigned long arch_local_irq_save(void)
 {
        unsigned long flags;
@@ -111,31 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags)
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
 
-
-notrace void __arch_local_irq_restore(unsigned long flags)
-{
-       unsigned long __tmp1;
-
-       preempt_disable();
-
-       __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-       "       mfc0    $1, $12                                         \n"
-       "       andi    %[flags], 1                                     \n"
-       "       ori     $1, 0x1f                                        \n"
-       "       xori    $1, 0x1f                                        \n"
-       "       or      %[flags], $1                                    \n"
-       "       mtc0    %[flags], $12                                   \n"
-       "       " __stringify(__irq_disable_hazard) "                   \n"
-       "       .set    pop                                             \n"
-       : [flags] "=r" (__tmp1)
-       : "0" (flags)
-       : "memory");
-
-       preempt_enable();
-}
-EXPORT_SYMBOL(__arch_local_irq_restore);
-
-#endif /* !CONFIG_CPU_MIPSR2 */
+#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
index 2e48e83d5524ac13a25a98b0c1d9bd60ae2c7ffa..85d808924c94b52b25f24654def7529b76039266 100644 (file)
@@ -22,6 +22,27 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
   endif
 endif
 
+cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+#
+# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
+# as MIPS64 R2; older versions as just R1.  This leaves the possibility open
+# that GCC might generate R2 code for -march=loongson3a which then is rejected
+# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
+# can't easily be used safely within the kbuild framework.
+#
+ifeq ($(call cc-ifversion, -ge, 0409, y), y)
+  ifeq ($(call ld-ifversion, -ge, 22500000, y), y)
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+  else
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+  endif
+else
+    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+      $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+endif
+
 #
 # Loongson Machines' Support
 #
index bf9f1a77f0e59825fcdb6ced3ce73ca78d8c7c19..a2631a52ca998c0029b27cec6fa85bac9d0782d0 100644 (file)
@@ -13,6 +13,9 @@
 #define SMBUS_PCI_REG64                0x64
 #define SMBUS_PCI_REGB4                0xb4
 
+#define HPET_MIN_CYCLES                64
+#define HPET_MIN_PROG_DELTA    (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+
 static DEFINE_SPINLOCK(hpet_lock);
 DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
 
@@ -161,8 +164,9 @@ static int hpet_next_event(unsigned long delta,
        cnt += delta;
        hpet_write(HPET_T0_CMP, cnt);
 
-       res = ((int)(hpet_read(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
-       return res;
+       res = (int)(cnt - hpet_read(HPET_COUNTER));
+
+       return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
 
 static irqreturn_t hpet_irq_handler(int irq, void *data)
@@ -237,7 +241,7 @@ void __init setup_hpet_timer(void)
        cd->cpumask = cpumask_of(cpu);
        clockevent_set_clock(cd, HPET_FREQ);
        cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
-       cd->min_delta_ns = 5000;
+       cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd);
 
        clockevents_register_device(cd);
        setup_irq(HPET_T0_IRQ, &hpet_irq);
index 1a4738a8f2d3906ccffb58bdf8d9b35ee4b04ef3..509832a9836c9ae70f52aa422ab70f85d7f8971c 100644 (file)
 #include "smp.h"
 
 DEFINE_PER_CPU(int, cpu_state);
-DEFINE_PER_CPU(uint32_t, core0_c0count);
 
 static void *ipi_set0_regs[16];
 static void *ipi_clear0_regs[16];
 static void *ipi_status0_regs[16];
 static void *ipi_en0_regs[16];
 static void *ipi_mailbox_buf[16];
+static uint32_t core0_c0count[NR_CPUS];
 
 /* read a 32bit value from ipi register */
 #define loongson3_ipi_read32(addr) readl(addr)
@@ -275,12 +275,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
        if (action & SMP_ASK_C0COUNT) {
                BUG_ON(cpu != 0);
                c0count = read_c0_count();
-               for (i = 1; i < num_possible_cpus(); i++)
-                       per_cpu(core0_c0count, i) = c0count;
+               c0count = c0count ? c0count : 1;
+               for (i = 1; i < nr_cpu_ids; i++)
+                       core0_c0count[i] = c0count;
+               __wbflush(); /* Let others see the result ASAP */
        }
 }
 
-#define MAX_LOOPS 1111
+#define MAX_LOOPS 800
 /*
  * SMP init and finish on secondary CPUs
  */
@@ -305,16 +307,20 @@ static void loongson3_init_secondary(void)
                cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
 
        i = 0;
-       __this_cpu_write(core0_c0count, 0);
+       core0_c0count[cpu] = 0;
        loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
-       while (!__this_cpu_read(core0_c0count)) {
+       while (!core0_c0count[cpu]) {
                i++;
                cpu_relax();
        }
 
        if (i > MAX_LOOPS)
                i = MAX_LOOPS;
-       initcount = __this_cpu_read(core0_c0count) + i;
+       if (cpu_data[cpu].package)
+               initcount = core0_c0count[cpu] + i;
+       else /* Local access is faster for loops */
+               initcount = core0_c0count[cpu] + i/2;
+
        write_c0_count(initcount);
 }
 
index 32f0e19a0d7f71fef2bb693e0da7b7bbc6bc9af9..cdfd44ffa51c88133b7b59417adb3fc070dfeee7 100644 (file)
@@ -1266,6 +1266,8 @@ branch_common:
                                                 */
                                                sig = mips_dsemul(xcp, ir,
                                                                  contpc);
+                                               if (sig < 0)
+                                                       break;
                                                if (sig)
                                                        xcp->cp0_epc = bcpc;
                                                /*
@@ -1319,6 +1321,8 @@ branch_common:
                                 * instruction in the dslot
                                 */
                                sig = mips_dsemul(xcp, ir, contpc);
+                               if (sig < 0)
+                                       break;
                                if (sig)
                                        xcp->cp0_epc = bcpc;
                                /* SIGILL forces out of the emulation loop.  */
index 926d56bf37f24ca1dbfcfcdbf71cc8f3165dfa08..eb96485ed939ccc09c378e184c3a2a83fec80717 100644 (file)
 
 union ieee754dp ieee754dp_neg(union ieee754dp x)
 {
-       unsigned int oldrm;
        union ieee754dp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       y = ieee754dp_sub(ieee754dp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               DPSIGN(y) = !DPSIGN(x);
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               y = ieee754dp_sub(ieee754dp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
 
 union ieee754dp ieee754dp_abs(union ieee754dp x)
 {
-       unsigned int oldrm;
        union ieee754dp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       if (DPSIGN(x))
-               y = ieee754dp_sub(ieee754dp_zero(0), x);
-       else
-               y = ieee754dp_add(ieee754dp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               DPSIGN(y) = 0;
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               if (DPSIGN(x))
+                       y = ieee754dp_sub(ieee754dp_zero(0), x);
+               else
+                       y = ieee754dp_add(ieee754dp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
index 6ffc336c530e3e11c48c1dcb8ab9eb50224b7ff4..f3985617ce31526c7de1d1fb19a0af3a207bd925 100644 (file)
@@ -38,10 +38,13 @@ int ieee754dp_tint(union ieee754dp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754si_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754si_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -53,7 +56,7 @@ int ieee754dp_tint(union ieee754dp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754si_indef();
+               return ieee754si_overflow(xs);
        }
        /* oh gawd */
        if (xe > DP_FBITS) {
@@ -93,7 +96,7 @@ int ieee754dp_tint(union ieee754dp x)
                if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754si_indef();
+                       return ieee754si_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 9cdc145b75e012d75a22f146c1e3c3b5c65a4e7f..748fa10ed4cf63d3e0995a3d6ae6860b6c432d7b 100644 (file)
@@ -38,10 +38,13 @@ s64 ieee754dp_tlong(union ieee754dp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754di_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754di_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -56,7 +59,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754di_indef();
+               return ieee754di_overflow(xs);
        }
        /* oh gawd */
        if (xe > DP_FBITS) {
@@ -97,7 +100,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
                if ((xm >> 63) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754di_indef();
+                       return ieee754di_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index cbb36c14b155ad07dae2250550bc4090622180cd..46b964d2b79c0a0f24aa7cf6e77e1846a88e842f 100644 (file)
@@ -31,17 +31,41 @@ struct emuframe {
        unsigned long           epc;
 };
 
+/*
+ * Set up an emulation frame for instruction IR, from a delay slot of
+ * a branch jumping to CPC.  Return 0 if successful, -1 if no emulation
+ * required, otherwise a signal number causing a frame setup failure.
+ */
 int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 {
+       int isa16 = get_isa16_mode(regs->cp0_epc);
+       mips_instruction break_math;
        struct emuframe __user *fr;
        int err;
 
-       if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) ||
-               (ir == 0)) {
-               /* NOP is easy */
-               regs->cp0_epc = cpc;
-               clear_delay_slot(regs);
-               return 0;
+       /* NOP is easy */
+       if (ir == 0)
+               return -1;
+
+       /* microMIPS instructions */
+       if (isa16) {
+               union mips_instruction insn = { .word = ir };
+
+               /* NOP16 aka MOVE16 $0, $0 */
+               if ((ir >> 16) == MM_NOP16)
+                       return -1;
+
+               /* ADDIUPC */
+               if (insn.mm_a_format.opcode == mm_addiupc_op) {
+                       unsigned int rs;
+                       s32 v;
+
+                       rs = (((insn.mm_a_format.rs + 0x1e) & 0xf) + 2);
+                       v = regs->cp0_epc & ~3;
+                       v += insn.mm_a_format.simmediate << 2;
+                       regs->regs[rs] = (long)v;
+                       return -1;
+               }
        }
 
        pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
@@ -55,14 +79,10 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
         * Algorithmics used a system call instruction, and
         * borrowed that vector.  MIPS/Linux version is a bit
         * more heavyweight in the interests of portability and
-        * multiprocessor support.  For Linux we generate a
-        * an unaligned access and force an address error exception.
-        *
-        * For embedded systems (stand-alone) we prefer to use a
-        * non-existing CP1 instruction. This prevents us from emulating
-        * branches, but gives us a cleaner interface to the exception
-        * handler (single entry point).
+        * multiprocessor support.  For Linux we use a BREAK 514
+        * instruction causing a breakpoint exception.
         */
+       break_math = BREAK_MATH(isa16);
 
        /* Ensure that the two instructions are in the same cache line */
        fr = (struct emuframe __user *)
@@ -72,14 +92,18 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
        if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
                return SIGBUS;
 
-       if (get_isa16_mode(regs->cp0_epc)) {
-               err = __put_user(ir >> 16, (u16 __user *)(&fr->emul));
-               err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2));
-               err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst));
-               err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2));
+       if (isa16) {
+               err = __put_user(ir >> 16,
+                                (u16 __user *)(&fr->emul));
+               err |= __put_user(ir & 0xffff,
+                                 (u16 __user *)((long)(&fr->emul) + 2));
+               err |= __put_user(break_math >> 16,
+                                 (u16 __user *)(&fr->badinst));
+               err |= __put_user(break_math & 0xffff,
+                                 (u16 __user *)((long)(&fr->badinst) + 2));
        } else {
                err = __put_user(ir, &fr->emul);
-               err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst);
+               err |= __put_user(break_math, &fr->badinst);
        }
 
        err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
@@ -90,8 +114,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
                return SIGBUS;
        }
 
-       regs->cp0_epc = ((unsigned long) &fr->emul) |
-               get_isa16_mode(regs->cp0_epc);
+       regs->cp0_epc = (unsigned long)&fr->emul | isa16;
 
        flush_cache_sigtramp((unsigned long)&fr->emul);
 
@@ -100,6 +123,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 
 int do_dsemulret(struct pt_regs *xcp)
 {
+       int isa16 = get_isa16_mode(xcp->cp0_epc);
        struct emuframe __user *fr;
        unsigned long epc;
        u32 insn, cookie;
@@ -122,16 +146,19 @@ int do_dsemulret(struct pt_regs *xcp)
         *  - Is the instruction pointed to by the EPC an BREAK_MATH?
         *  - Is the following memory word the BD_COOKIE?
         */
-       if (get_isa16_mode(xcp->cp0_epc)) {
-               err = __get_user(instr[0], (u16 __user *)(&fr->badinst));
-               err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2));
+       if (isa16) {
+               err = __get_user(instr[0],
+                                (u16 __user *)(&fr->badinst));
+               err |= __get_user(instr[1],
+                                 (u16 __user *)((long)(&fr->badinst) + 2));
                insn = (instr[0] << 16) | instr[1];
        } else {
                err = __get_user(insn, &fr->badinst);
        }
        err |= __get_user(cookie, &fr->cookie);
 
-       if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) {
+       if (unlikely(err ||
+                    insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) {
                MIPS_FPU_EMU_INC_STATS(errors);
                return 0;
        }
index 8e97acbbe22ceb96789d36c44b98500f9885154a..e16ae7b75dbb775c9af078994e785c257a454f39 100644 (file)
@@ -59,7 +59,8 @@ const union ieee754dp __ieee754dp_spcvals[] = {
        DPCNST(1, 3,           0x4000000000000ULL),     /* - 10.0   */
        DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL),     /* + infinity */
        DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL),     /* - infinity */
-       DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),     /* + indef quiet Nan */
+       DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL),     /* + ind legacy qNaN */
+       DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL),     /* + indef 2008 qNaN */
        DPCNST(0, DP_EMAX,     0xFFFFFFFFFFFFFULL),     /* + max */
        DPCNST(1, DP_EMAX,     0xFFFFFFFFFFFFFULL),     /* - max */
        DPCNST(0, DP_EMIN,     0x0000000000000ULL),     /* + min normal */
@@ -82,7 +83,8 @@ const union ieee754sp __ieee754sp_spcvals[] = {
        SPCNST(1, 3,           0x200000),       /* - 10.0   */
        SPCNST(0, SP_EMAX + 1, 0x000000),       /* + infinity */
        SPCNST(1, SP_EMAX + 1, 0x000000),       /* - infinity */
-       SPCNST(0, SP_EMAX + 1, 0x3FFFFF),       /* + indef quiet Nan  */
+       SPCNST(0, SP_EMAX + 1, 0x3FFFFF),       /* + indef legacy quiet NaN */
+       SPCNST(0, SP_EMAX + 1, 0x400000),       /* + indef 2008 quiet NaN */
        SPCNST(0, SP_EMAX,     0x7FFFFF),       /* + max normal */
        SPCNST(1, SP_EMAX,     0x7FFFFF),       /* - max normal */
        SPCNST(0, SP_EMIN,     0x000000),       /* + min normal */
index df94720714c73cc95736544f7b05804d607d27ca..d3be351aed151305396723ddfff661837a114cde 100644 (file)
@@ -221,15 +221,16 @@ union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
 #define IEEE754_SPCVAL_NTEN            5       /* -10.0 */
 #define IEEE754_SPCVAL_PINFINITY       6       /* +inf */
 #define IEEE754_SPCVAL_NINFINITY       7       /* -inf */
-#define IEEE754_SPCVAL_INDEF           8       /* quiet NaN */
-#define IEEE754_SPCVAL_PMAX            9       /* +max norm */
-#define IEEE754_SPCVAL_NMAX            10      /* -max norm */
-#define IEEE754_SPCVAL_PMIN            11      /* +min norm */
-#define IEEE754_SPCVAL_NMIN            12      /* -min norm */
-#define IEEE754_SPCVAL_PMIND           13      /* +min denorm */
-#define IEEE754_SPCVAL_NMIND           14      /* -min denorm */
-#define IEEE754_SPCVAL_P1E31           15      /* + 1.0e31 */
-#define IEEE754_SPCVAL_P1E63           16      /* + 1.0e63 */
+#define IEEE754_SPCVAL_INDEF_LEG       8       /* legacy quiet NaN */
+#define IEEE754_SPCVAL_INDEF_2008      9       /* IEEE 754-2008 quiet NaN */
+#define IEEE754_SPCVAL_PMAX            10      /* +max norm */
+#define IEEE754_SPCVAL_NMAX            11      /* -max norm */
+#define IEEE754_SPCVAL_PMIN            12      /* +min norm */
+#define IEEE754_SPCVAL_NMIN            13      /* -min norm */
+#define IEEE754_SPCVAL_PMIND           14      /* +min denorm */
+#define IEEE754_SPCVAL_NMIND           15      /* -min denorm */
+#define IEEE754_SPCVAL_P1E31           16      /* + 1.0e31 */
+#define IEEE754_SPCVAL_P1E63           17      /* + 1.0e63 */
 
 extern const union ieee754dp __ieee754dp_spcvals[];
 extern const union ieee754sp __ieee754sp_spcvals[];
@@ -243,7 +244,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
 #define ieee754dp_zero(sn)     (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
 #define ieee754dp_one(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
 #define ieee754dp_ten(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754dp_indef()      (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754dp_indef()      (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+                                                  ieee754_csr.nan2008])
 #define ieee754dp_max(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
 #define ieee754dp_min(sn)      (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
 #define ieee754dp_mind(sn)     (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -254,7 +256,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
 #define ieee754sp_zero(sn)     (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
 #define ieee754sp_one(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
 #define ieee754sp_ten(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754sp_indef()      (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754sp_indef()      (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+                                                  ieee754_csr.nan2008])
 #define ieee754sp_max(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
 #define ieee754sp_min(sn)      (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
 #define ieee754sp_mind(sn)     (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -266,12 +269,25 @@ extern const union ieee754sp __ieee754sp_spcvals[];
  */
 static inline int ieee754si_indef(void)
 {
-       return INT_MAX;
+       return ieee754_csr.nan2008 ? 0 : INT_MAX;
 }
 
 static inline s64 ieee754di_indef(void)
 {
-       return S64_MAX;
+       return ieee754_csr.nan2008 ? 0 : S64_MAX;
+}
+
+/*
+ * Overflow integer value
+ */
+static inline int ieee754si_overflow(int xs)
+{
+       return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX;
+}
+
+static inline s64 ieee754di_overflow(int xs)
+{
+       return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX;
 }
 
 /* result types for xctx.rt */
index 522d843f2ffd04a47873328724a9d45965dce0ce..ad3c73436777f0c3103c887827c4547d888f5941 100644 (file)
@@ -37,8 +37,11 @@ static inline int ieee754dp_isnan(union ieee754dp x)
 
 static inline int ieee754dp_issnan(union ieee754dp x)
 {
+       int qbit;
+
        assert(ieee754dp_isnan(x));
-       return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+       qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+       return ieee754_csr.nan2008 ^ qbit;
 }
 
 
@@ -51,7 +54,12 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
        assert(ieee754dp_issnan(r));
 
        ieee754_setcx(IEEE754_INVALID_OPERATION);
-       return ieee754dp_indef();
+       if (ieee754_csr.nan2008)
+               DPMANT(r) |= DP_MBIT(DP_FBITS - 1);
+       else
+               r = ieee754dp_indef();
+
+       return r;
 }
 
 static u64 ieee754dp_get_rounding(int sn, u64 xm)
index 6383e2c5c1adb5d527fe1023eb173f06e3776dc7..ed7bb277b3e05e224315508f32396fd37c239641 100644 (file)
@@ -63,10 +63,10 @@ static inline int ieee754_class_nan(int xc)
        if (ve == SP_EMAX+1+SP_EBIAS) {                                 \
                if (vm == 0)                                            \
                        vc = IEEE754_CLASS_INF;                         \
-               else if (vm & SP_MBIT(SP_FBITS-1))                      \
-                       vc = IEEE754_CLASS_SNAN;                        \
-               else                                                    \
+               else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \
                        vc = IEEE754_CLASS_QNAN;                        \
+               else                                                    \
+                       vc = IEEE754_CLASS_SNAN;                        \
        } else if (ve == SP_EMIN-1+SP_EBIAS) {                          \
                if (vm) {                                               \
                        ve = SP_EMIN;                                   \
@@ -97,10 +97,10 @@ static inline int ieee754_class_nan(int xc)
        if (ve == DP_EMAX+1+DP_EBIAS) {                                 \
                if (vm == 0)                                            \
                        vc = IEEE754_CLASS_INF;                         \
-               else if (vm & DP_MBIT(DP_FBITS-1))                      \
-                       vc = IEEE754_CLASS_SNAN;                        \
-               else                                                    \
+               else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \
                        vc = IEEE754_CLASS_QNAN;                        \
+               else                                                    \
+                       vc = IEEE754_CLASS_SNAN;                        \
        } else if (ve == DP_EMIN-1+DP_EBIAS) {                          \
                if (vm) {                                               \
                        ve = DP_EMIN;                                   \
index ca8e35e33bf790f594156268e320ce866b2ed5c2..def00ffc50fcc7bc4740933215cdf5872940a450 100644 (file)
@@ -37,8 +37,11 @@ static inline int ieee754sp_isnan(union ieee754sp x)
 
 static inline int ieee754sp_issnan(union ieee754sp x)
 {
+       int qbit;
+
        assert(ieee754sp_isnan(x));
-       return SPMANT(x) & SP_MBIT(SP_FBITS - 1);
+       qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1);
+       return ieee754_csr.nan2008 ^ qbit;
 }
 
 
@@ -51,7 +54,12 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
        assert(ieee754sp_issnan(r));
 
        ieee754_setcx(IEEE754_INVALID_OPERATION);
-       return ieee754sp_indef();
+       if (ieee754_csr.nan2008)
+               SPMANT(r) |= SP_MBIT(SP_FBITS - 1);
+       else
+               r = ieee754sp_indef();
+
+       return r;
 }
 
 static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
index 3797148893adb62c10a989dc78e5e9dce56e1e9e..5060e8fdcb0b8817c5f6836e0a20273f08a79b52 100644 (file)
@@ -44,13 +44,16 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
 
        switch (xc) {
        case IEEE754_CLASS_SNAN:
-               return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm));
-
+               x = ieee754dp_nanxcpt(x);
+               EXPLODEXDP;
+               /* Fall through.  */
        case IEEE754_CLASS_QNAN:
                y = ieee754sp_nan_fdp(xs, xm);
-               EXPLODEYSP;
-               if (!ieee754_class_nan(yc))
-                       y = ieee754sp_indef();
+               if (!ieee754_csr.nan2008) {
+                       EXPLODEYSP;
+                       if (!ieee754_class_nan(yc))
+                               y = ieee754sp_indef();
+               }
                return y;
 
        case IEEE754_CLASS_INF:
index c50e9451f2d21da196d0afbe00979548ca8b217e..756c9cf2dfd2514f7461190a9a03fc065c697195 100644 (file)
 
 union ieee754sp ieee754sp_neg(union ieee754sp x)
 {
-       unsigned int oldrm;
        union ieee754sp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       y = ieee754sp_sub(ieee754sp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               SPSIGN(y) = !SPSIGN(x);
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               y = ieee754sp_sub(ieee754sp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
 
 union ieee754sp ieee754sp_abs(union ieee754sp x)
 {
-       unsigned int oldrm;
        union ieee754sp y;
 
-       oldrm = ieee754_csr.rm;
-       ieee754_csr.rm = FPU_CSR_RD;
-       if (SPSIGN(x))
-               y = ieee754sp_sub(ieee754sp_zero(0), x);
-       else
-               y = ieee754sp_add(ieee754sp_zero(0), x);
-       ieee754_csr.rm = oldrm;
+       if (ieee754_csr.abs2008) {
+               y = x;
+               SPSIGN(y) = 0;
+       } else {
+               unsigned int oldrm;
+
+               oldrm = ieee754_csr.rm;
+               ieee754_csr.rm = FPU_CSR_RD;
+               if (SPSIGN(x))
+                       y = ieee754sp_sub(ieee754sp_zero(0), x);
+               else
+                       y = ieee754sp_add(ieee754sp_zero(0), x);
+               ieee754_csr.rm = oldrm;
+       }
        return y;
 }
index 091299a317980b6bf0cfd84f0958b1f271d2bf9d..f4b4cabfe2e11740e17b4c77c913e6762ac26b71 100644 (file)
@@ -38,10 +38,13 @@ int ieee754sp_tint(union ieee754sp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754si_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754si_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -56,7 +59,7 @@ int ieee754sp_tint(union ieee754sp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754si_indef();
+               return ieee754si_overflow(xs);
        }
        /* oh gawd */
        if (xe > SP_FBITS) {
@@ -97,7 +100,7 @@ int ieee754sp_tint(union ieee754sp x)
                if ((xm >> 31) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754si_indef();
+                       return ieee754si_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 9f3c742c1cea6ac264d1d4d66ef56b6916f675cb..a2450c7e452a672fdeba96bba5a7c262c1b6e876 100644 (file)
@@ -39,10 +39,13 @@ s64 ieee754sp_tlong(union ieee754sp x)
        switch (xc) {
        case IEEE754_CLASS_SNAN:
        case IEEE754_CLASS_QNAN:
-       case IEEE754_CLASS_INF:
                ieee754_setcx(IEEE754_INVALID_OPERATION);
                return ieee754di_indef();
 
+       case IEEE754_CLASS_INF:
+               ieee754_setcx(IEEE754_INVALID_OPERATION);
+               return ieee754di_overflow(xs);
+
        case IEEE754_CLASS_ZERO:
                return 0;
 
@@ -57,7 +60,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
                /* Set invalid. We will only use overflow for floating
                   point overflow */
                ieee754_setcx(IEEE754_INVALID_OPERATION);
-               return ieee754di_indef();
+               return ieee754di_overflow(xs);
        }
        /* oh gawd */
        if (xe > SP_FBITS) {
@@ -94,7 +97,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
                if ((xm >> 63) != 0) {
                        /* This can happen after rounding */
                        ieee754_setcx(IEEE754_INVALID_OPERATION);
-                       return ieee754di_indef();
+                       return ieee754di_overflow(xs);
                }
                if (round || sticky)
                        ieee754_setcx(IEEE754_INEXACT);
index 482192cc8f2b88ae89f4cf1495c0dd6055a5bda6..5a04b6f5c6fb8a2cc52d986f59042ad8cd585850 100644 (file)
@@ -241,7 +241,7 @@ static void output_pgtable_bits_defines(void)
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
        pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
 #endif
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (cpu_has_rixi) {
 #ifdef _PAGE_NO_EXEC_SHIFT
                pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
index 2eda01e6e08fd38d5e572f5e66283536d9e9cd34..139ad1d7ab5e3ce9dfa8aba6b507fe04e8b8d8b9 100644 (file)
@@ -43,6 +43,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80)  += pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)           += fixup-sni.o ops-sni.o
 obj-$(CONFIG_LANTIQ)           += fixup-lantiq.o
 obj-$(CONFIG_PCI_LANTIQ)       += pci-lantiq.o ops-lantiq.o
+obj-$(CONFIG_SOC_MT7620)       += pci-mt7620.o
 obj-$(CONFIG_SOC_RT288X)       += pci-rt2880.o
 obj-$(CONFIG_SOC_RT3883)       += pci-rt3883.o
 obj-$(CONFIG_TANBAC_TB0219)    += fixup-tb0219.o
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
new file mode 100644 (file)
index 0000000..a009ee4
--- /dev/null
@@ -0,0 +1,426 @@
+/*
+ *  Ralink MT7620A SoC PCI support
+ *
+ *  Copyright (C) 2007-2013 Bruce Chang (Mediatek)
+ *  Copyright (C) 2013-2016 John Crispin <blogic@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7620.h>
+
+#define RALINK_PCI_IO_MAP_BASE         0x10160000
+#define RALINK_PCI_MEMORY_BASE         0x0
+
+#define RALINK_INT_PCIE0               4
+
+#define RALINK_CLKCFG1                 0x30
+#define RALINK_GPIOMODE                        0x60
+
+#define PPLL_CFG1                      0x9c
+#define PDRV_SW_SET                    BIT(23)
+
+#define PPLL_DRV                       0xa0
+#define PDRV_SW_SET                    (1<<31)
+#define LC_CKDRVPD                     (1<<19)
+#define LC_CKDRVOHZ                    (1<<18)
+#define LC_CKDRVHZ                     (1<<17)
+#define LC_CKTEST                      (1<<16)
+
+/* PCI Bridge registers */
+#define RALINK_PCI_PCICFG_ADDR         0x00
+#define PCIRST                         BIT(1)
+
+#define RALINK_PCI_PCIENA              0x0C
+#define PCIINT2                                BIT(20)
+
+#define RALINK_PCI_CONFIG_ADDR         0x20
+#define RALINK_PCI_CONFIG_DATA_VIRT_REG        0x24
+#define RALINK_PCI_MEMBASE             0x28
+#define RALINK_PCI_IOBASE              0x2C
+
+/* PCI RC registers */
+#define RALINK_PCI0_BAR0SETUP_ADDR     0x10
+#define RALINK_PCI0_IMBASEBAR0_ADDR    0x18
+#define RALINK_PCI0_ID                 0x30
+#define RALINK_PCI0_CLASS              0x34
+#define RALINK_PCI0_SUBID              0x38
+#define RALINK_PCI0_STATUS             0x50
+#define PCIE_LINK_UP_ST                        BIT(0)
+
+#define PCIEPHY0_CFG                   0x90
+
+#define RALINK_PCIEPHY_P0_CTL_OFFSET   0x7498
+#define RALINK_PCIE0_CLK_EN            (1 << 26)
+
+#define BUSY                           0x80000000
+#define WAITRETRY_MAX                  10
+#define WRITE_MODE                     (1UL << 23)
+#define DATA_SHIFT                     0
+#define ADDR_SHIFT                     8
+
+
+static void __iomem *bridge_base;
+static void __iomem *pcie_base;
+
+static struct reset_control *rstpcie0;
+
+static inline void bridge_w32(u32 val, unsigned reg)
+{
+       iowrite32(val, bridge_base + reg);
+}
+
+static inline u32 bridge_r32(unsigned reg)
+{
+       return ioread32(bridge_base + reg);
+}
+
+static inline void pcie_w32(u32 val, unsigned reg)
+{
+       iowrite32(val, pcie_base + reg);
+}
+
+static inline u32 pcie_r32(unsigned reg)
+{
+       return ioread32(pcie_base + reg);
+}
+
+static inline void pcie_m32(u32 clr, u32 set, unsigned reg)
+{
+       u32 val = pcie_r32(reg);
+
+       val &= ~clr;
+       val |= set;
+       pcie_w32(val, reg);
+}
+
+static int wait_pciephy_busy(void)
+{
+       unsigned long reg_value = 0x0, retry = 0;
+
+       while (1) {
+               reg_value = pcie_r32(PCIEPHY0_CFG);
+
+               if (reg_value & BUSY)
+                       mdelay(100);
+               else
+                       break;
+               if (retry++ > WAITRETRY_MAX) {
+                       printk(KERN_WARN "PCIE-PHY retry failed.\n");
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static void pcie_phy(unsigned long addr, unsigned long val)
+{
+       wait_pciephy_busy();
+       pcie_w32(WRITE_MODE | (val << DATA_SHIFT) | (addr << ADDR_SHIFT),
+                PCIEPHY0_CFG);
+       mdelay(1);
+       wait_pciephy_busy();
+}
+
+static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where,
+                          int size, u32 *val)
+{
+       unsigned int slot = PCI_SLOT(devfn);
+       u8 func = PCI_FUNC(devfn);
+       u32 address;
+       u32 data;
+       u32 num = 0;
+
+       if (bus)
+               num = bus->number;
+
+       address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+                 (func << 8) | (where & 0xfc) | 0x80000000;
+       bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+       data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       switch (size) {
+       case 1:
+               *val = (data >> ((where & 3) << 3)) & 0xff;
+               break;
+       case 2:
+               *val = (data >> ((where & 3) << 3)) & 0xffff;
+               break;
+       case 4:
+               *val = data;
+               break;
+       }
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where,
+                           int size, u32 val)
+{
+       unsigned int slot = PCI_SLOT(devfn);
+       u8 func = PCI_FUNC(devfn);
+       u32 address;
+       u32 data;
+       u32 num = 0;
+
+       if (bus)
+               num = bus->number;
+
+       address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+                 (func << 8) | (where & 0xfc) | 0x80000000;
+       bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+       data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       switch (size) {
+       case 1:
+               data = (data & ~(0xff << ((where & 3) << 3))) |
+                       (val << ((where & 3) << 3));
+               break;
+       case 2:
+               data = (data & ~(0xffff << ((where & 3) << 3))) |
+                       (val << ((where & 3) << 3));
+               break;
+       case 4:
+               data = val;
+               break;
+       }
+
+       bridge_w32(data, RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops mt7620_pci_ops = {
+       .read   = pci_config_read,
+       .write  = pci_config_write,
+};
+
+static struct resource mt7620_res_pci_mem1;
+static struct resource mt7620_res_pci_io1;
+struct pci_controller mt7620_controller = {
+       .pci_ops        = &mt7620_pci_ops,
+       .mem_resource   = &mt7620_res_pci_mem1,
+       .mem_offset     = 0x00000000UL,
+       .io_resource    = &mt7620_res_pci_io1,
+       .io_offset      = 0x00000000UL,
+       .io_map_base    = 0xa0000000,
+};
+
+static int mt7620_pci_hw_init(struct platform_device *pdev)
+{
+       /* bypass PCIe DLL */
+       pcie_phy(0x0, 0x80);
+       pcie_phy(0x1, 0x04);
+
+       /* Elastic buffer control */
+       pcie_phy(0x68, 0xB4);
+
+       /* put core into reset */
+       pcie_m32(0, PCIRST, RALINK_PCI_PCICFG_ADDR);
+       reset_control_assert(rstpcie0);
+
+       /* disable power and all clocks */
+       rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+       rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+
+       /* bring core out of reset */
+       reset_control_deassert(rstpcie0);
+       rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+       mdelay(100);
+
+       if (!(rt_sysc_r32(PPLL_CFG1) & PDRV_SW_SET)) {
+               dev_err(&pdev->dev, "MT7620 PPLL unlock\n");
+               reset_control_assert(rstpcie0);
+               rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+               return -1;
+       }
+
+       /* power up the bus */
+       rt_sysc_m32(LC_CKDRVHZ | LC_CKDRVOHZ, LC_CKDRVPD | PDRV_SW_SET,
+                   PPLL_DRV);
+
+       return 0;
+}
+
+static int mt7628_pci_hw_init(struct platform_device *pdev)
+{
+       u32 val = 0;
+
+       /* bring the core out of reset */
+       rt_sysc_m32(BIT(16), 0, RALINK_GPIOMODE);
+       reset_control_deassert(rstpcie0);
+
+       /* enable the pci clk */
+       rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+       mdelay(100);
+
+       /* voodoo from the SDK driver */
+       pcie_m32(~0xff, 0x5, RALINK_PCIEPHY_P0_CTL_OFFSET);
+
+       pci_config_read(NULL, 0, 0x70c, 4, &val);
+       val &= ~(0xff) << 8;
+       val |= 0x50 << 8;
+       pci_config_write(NULL, 0, 0x70c, 4, val);
+
+       pci_config_read(NULL, 0, 0x70c, 4, &val);
+       dev_err(&pdev->dev, "Port 0 N_FTS = %x\n", (unsigned int) val);
+
+       return 0;
+}
+
+static int mt7620_pci_probe(struct platform_device *pdev)
+{
+       struct resource *bridge_res = platform_get_resource(pdev,
+                                                           IORESOURCE_MEM, 0);
+       struct resource *pcie_res = platform_get_resource(pdev,
+                                                         IORESOURCE_MEM, 1);
+       u32 val = 0;
+
+       rstpcie0 = devm_reset_control_get(&pdev->dev, "pcie0");
+       if (IS_ERR(rstpcie0))
+               return PTR_ERR(rstpcie0);
+
+       bridge_base = devm_ioremap_resource(&pdev->dev, bridge_res);
+       if (!bridge_base)
+               return -ENOMEM;
+
+       pcie_base = devm_ioremap_resource(&pdev->dev, pcie_res);
+       if (!pcie_base)
+               return -ENOMEM;
+
+       iomem_resource.start = 0;
+       iomem_resource.end = ~0;
+       ioport_resource.start = 0;
+       ioport_resource.end = ~0;
+
+       /* bring up the pci core */
+       switch (ralink_soc) {
+       case MT762X_SOC_MT7620A:
+               if (mt7620_pci_hw_init(pdev))
+                       return -1;
+               break;
+
+       case MT762X_SOC_MT7628AN:
+               if (mt7628_pci_hw_init(pdev))
+                       return -1;
+               break;
+
+       default:
+               dev_err(&pdev->dev, "pcie is not supported on this hardware\n");
+               return -1;
+       }
+       mdelay(50);
+
+       /* enable write access */
+       pcie_m32(PCIRST, 0, RALINK_PCI_PCICFG_ADDR);
+       mdelay(100);
+
+       /* check if there is a card present */
+       if ((pcie_r32(RALINK_PCI0_STATUS) & PCIE_LINK_UP_ST) == 0) {
+               reset_control_assert(rstpcie0);
+               rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+               if (ralink_soc == MT762X_SOC_MT7620A)
+                       rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+               dev_err(&pdev->dev, "PCIE0 no card, disable it(RST&CLK)\n");
+               return -1;
+       }
+
+       /* setup ranges */
+       bridge_w32(0xffffffff, RALINK_PCI_MEMBASE);
+       bridge_w32(RALINK_PCI_IO_MAP_BASE, RALINK_PCI_IOBASE);
+
+       pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+       pcie_w32(RALINK_PCI_MEMORY_BASE, RALINK_PCI0_IMBASEBAR0_ADDR);
+       pcie_w32(0x06040001, RALINK_PCI0_CLASS);
+
+       /* enable interrupts */
+       pcie_m32(0, PCIINT2, RALINK_PCI_PCIENA);
+
+       /* voodoo from the SDK driver */
+       pci_config_read(NULL, 0, 4, 4, &val);
+       pci_config_write(NULL, 0, 4, 4, val | 0x7);
+
+       pci_load_of_ranges(&mt7620_controller, pdev->dev.of_node);
+       register_pci_controller(&mt7620_controller);
+
+       return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+       u16 cmd;
+       u32 val;
+       int irq = 0;
+
+       if ((dev->bus->number == 0) && (slot == 0)) {
+               pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+               pci_config_write(dev->bus, 0, PCI_BASE_ADDRESS_0, 4,
+                                RALINK_PCI_MEMORY_BASE);
+               pci_config_read(dev->bus, 0, PCI_BASE_ADDRESS_0, 4, &val);
+       } else if ((dev->bus->number == 1) && (slot == 0x0)) {
+               irq = RALINK_INT_PCIE0;
+       } else {
+               dev_err(&dev->dev, "no irq found - bus=0x%x, slot = 0x%x\n",
+                       dev->bus->number, slot);
+               return 0;
+       }
+       dev_err(&dev->dev, "card - bus=0x%x, slot = 0x%x irq=%d\n",
+               dev->bus->number, slot, irq);
+
+       /* configure the cache line size to 0x14 */
+       pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 0x14);
+
+       /* configure latency timer to 0xff */
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xff);
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+       /* setup the slot */
+       cmd = cmd | PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
+       pci_write_config_word(dev, PCI_COMMAND, cmd);
+       pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+
+       return irq;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+       return 0;
+}
+
+static const struct of_device_id mt7620_pci_ids[] = {
+       { .compatible = "mediatek,mt7620-pci" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, mt7620_pci_ids);
+
+static struct platform_driver mt7620_pci_driver = {
+       .probe = mt7620_pci_probe,
+       .driver = {
+               .name = "mt7620-pci",
+               .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(mt7620_pci_ids),
+       },
+};
+
+static int __init mt7620_pci_init(void)
+{
+       return platform_driver_register(&mt7620_pci_driver);
+}
+
+arch_initcall(mt7620_pci_init);
diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig
new file mode 100644 (file)
index 0000000..fde56a8
--- /dev/null
@@ -0,0 +1,51 @@
+if MACH_PIC32
+
+choice
+       prompt "Machine Type"
+
+config PIC32MZDA
+       bool "Microchip PIC32MZDA Platform"
+       select BOOT_ELF32
+       select BOOT_RAW
+       select CEVT_R4K
+       select CSRC_R4K
+       select DMA_NONCOHERENT
+       select SYS_HAS_CPU_MIPS32_R2
+       select SYS_HAS_EARLY_PRINTK
+       select SYS_SUPPORTS_32BIT_KERNEL
+       select SYS_SUPPORTS_LITTLE_ENDIAN
+       select ARCH_REQUIRE_GPIOLIB
+       select HAVE_MACH_CLKDEV
+       select COMMON_CLK
+       select CLKDEV_LOOKUP
+       select LIBFDT
+       select USE_OF
+       select PINCTRL
+       select PIC32_EVIC
+       help
+         Support for the Microchip PIC32MZDA microcontroller.
+
+         This is a 32-bit microcontroller with support for external or
+         internally packaged DDR2 memory up to 128MB.
+
+         For more information, see <http://www.microchip.com/>.
+
+endchoice
+
+choice
+       prompt "Devicetree selection"
+       default DTB_PIC32_NONE
+       help
+         Select the devicetree.
+
+config DTB_PIC32_NONE
+       bool "None"
+
+config DTB_PIC32_MZDA_SK
+       bool "PIC32MZDA Starter Kit"
+       depends on PIC32MZDA
+       select BUILTIN_DTB
+
+endchoice
+
+endif # MACH_PIC32
diff --git a/arch/mips/pic32/Makefile b/arch/mips/pic32/Makefile
new file mode 100644 (file)
index 0000000..fd357f4
--- /dev/null
@@ -0,0 +1,6 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-$(CONFIG_MACH_PIC32) += common/
+obj-$(CONFIG_PIC32MZDA) += pic32mzda/
diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform
new file mode 100644 (file)
index 0000000..cd2084f
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# PIC32MZDA
+#
+platform-$(CONFIG_PIC32MZDA)   += pic32/
+cflags-$(CONFIG_PIC32MZDA)     += -I$(srctree)/arch/mips/include/asm/mach-pic32
+load-$(CONFIG_PIC32MZDA)       += 0xffffffff88000000
+all-$(CONFIG_PIC32MZDA)                := $(COMPRESSION_FNAME).bin
diff --git a/arch/mips/pic32/common/Makefile b/arch/mips/pic32/common/Makefile
new file mode 100644 (file)
index 0000000..be1909c
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-y = reset.o irq.o
diff --git a/arch/mips/pic32/common/irq.c b/arch/mips/pic32/common/irq.c
new file mode 100644 (file)
index 0000000..6df347e
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+void __init arch_init_irq(void)
+{
+       irqchip_init();
+}
diff --git a/arch/mips/pic32/common/reset.c b/arch/mips/pic32/common/reset.c
new file mode 100644 (file)
index 0000000..8334575
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/reboot.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define PIC32_RSWRST           0x10
+
+static void pic32_halt(void)
+{
+       while (1) {
+               __asm__(".set push;\n"
+                       ".set arch=r4000;\n"
+                       "wait;\n"
+                       ".set pop;\n"
+               );
+       }
+}
+
+static void pic32_machine_restart(char *command)
+{
+       void __iomem *reg =
+               ioremap(PIC32_BASE_RESET + PIC32_RSWRST, sizeof(u32));
+
+       pic32_syskey_unlock();
+
+       /* magic write/read */
+       __raw_writel(1, reg);
+       (void)__raw_readl(reg);
+
+       pic32_halt();
+}
+
+static void pic32_machine_halt(void)
+{
+       local_irq_disable();
+
+       pic32_halt();
+}
+
+static int __init mips_reboot_setup(void)
+{
+       _machine_restart = pic32_machine_restart;
+       _machine_halt = pic32_machine_halt;
+       pm_power_off = pic32_machine_halt;
+
+       return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/pic32/pic32mzda/Makefile b/arch/mips/pic32/pic32mzda/Makefile
new file mode 100644 (file)
index 0000000..4a4c272
--- /dev/null
@@ -0,0 +1,9 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc.  All rights reserved.
+#
+obj-y                  := init.o time.o config.o
+
+obj-$(CONFIG_EARLY_PRINTK)     += early_console.o      \
+                                  early_pin.o          \
+                                  early_clk.o
diff --git a/arch/mips/pic32/pic32mzda/config.c b/arch/mips/pic32/pic32mzda/config.c
new file mode 100644 (file)
index 0000000..fe293a0
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+#define PIC32_CFGCON   0x0000
+#define PIC32_DEVID    0x0020
+#define PIC32_SYSKEY   0x0030
+#define PIC32_CFGEBIA  0x00c0
+#define PIC32_CFGEBIC  0x00d0
+#define PIC32_CFGCON2  0x00f0
+#define PIC32_RCON     0x1240
+
+static void __iomem *pic32_conf_base;
+static DEFINE_SPINLOCK(config_lock);
+static u32 pic32_reset_status;
+
+static u32 pic32_conf_get_reg_field(u32 offset, u32 rshift, u32 mask)
+{
+       u32 v;
+
+       v = readl(pic32_conf_base + offset);
+       v >>= rshift;
+       v &= mask;
+
+       return v;
+}
+
+static u32 pic32_conf_modify_atomic(u32 offset, u32 mask, u32 set)
+{
+       u32 v;
+       unsigned long flags;
+
+       spin_lock_irqsave(&config_lock, flags);
+       v = readl(pic32_conf_base + offset);
+       v &= ~mask;
+       v |= (set & mask);
+       writel(v, pic32_conf_base + offset);
+       spin_unlock_irqrestore(&config_lock, flags);
+
+       return 0;
+}
+
+int pic32_enable_lcd(void)
+{
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), BIT(31));
+}
+
+int pic32_disable_lcd(void)
+{
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), 0);
+}
+
+int pic32_set_lcd_mode(int mode)
+{
+       u32 mask = mode ? BIT(30) : 0;
+
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(30), mask);
+}
+
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrsh, u32 wthrsh)
+{
+       u32 clr, set;
+
+       clr = (0x3ff << 4) | (0x3ff << 16);
+       set = (rthrsh << 4) | (wthrsh << 16);
+       return pic32_conf_modify_atomic(PIC32_CFGCON2, clr, set);
+}
+
+void pic32_syskey_unlock_debug(const char *func, const ulong line)
+{
+       void __iomem *syskey = pic32_conf_base + PIC32_SYSKEY;
+
+       pr_debug("%s: called from %s:%lu\n", __func__, func, line);
+       writel(0x00000000, syskey);
+       writel(0xAA996655, syskey);
+       writel(0x556699AA, syskey);
+}
+
+static u32 pic32_get_device_id(void)
+{
+       return pic32_conf_get_reg_field(PIC32_DEVID, 0, 0x0fffffff);
+}
+
+static u32 pic32_get_device_version(void)
+{
+       return pic32_conf_get_reg_field(PIC32_DEVID, 28, 0xf);
+}
+
+u32 pic32_get_boot_status(void)
+{
+       return pic32_reset_status;
+}
+EXPORT_SYMBOL(pic32_get_boot_status);
+
+void __init pic32_config_init(void)
+{
+       pic32_conf_base = ioremap(PIC32_BASE_CONFIG, 0x110);
+       if (!pic32_conf_base)
+               panic("pic32: config base not mapped");
+
+       /* Boot Status */
+       pic32_reset_status = readl(pic32_conf_base + PIC32_RCON);
+       writel(-1, PIC32_CLR(pic32_conf_base + PIC32_RCON));
+
+       /* Device Inforation */
+       pr_info("Device Id: 0x%08x, Device Ver: 0x%04x\n",
+               pic32_get_device_id(),
+               pic32_get_device_version());
+}
diff --git a/arch/mips/pic32/pic32mzda/early_clk.c b/arch/mips/pic32/pic32mzda/early_clk.c
new file mode 100644 (file)
index 0000000..96c090e
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+/* Oscillators, PLL & clocks */
+#define ICLK_MASK      0x00000080
+#define PLLDIV_MASK    0x00000007
+#define CUROSC_MASK    0x00000007
+#define PLLMUL_MASK    0x0000007F
+#define PB_MASK                0x00000007
+#define FRC1           0
+#define FRC2           7
+#define SPLL           1
+#define POSC           2
+#define FRC_CLK                8000000
+
+#define PIC32_POSC_FREQ        24000000
+
+#define OSCCON         0x0000
+#define SPLLCON                0x0020
+#define PB1DIV         0x0140
+
+u32 pic32_get_sysclk(void)
+{
+       u32 osc_freq = 0;
+       u32 pllclk;
+       u32 frcdivn;
+       u32 osccon;
+       u32 spllcon;
+       int curr_osc;
+
+       u32 plliclk;
+       u32 pllidiv;
+       u32 pllodiv;
+       u32 pllmult;
+       u32 frcdiv;
+
+       void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+
+       osccon = __raw_readl(osc_base + OSCCON);
+       spllcon = __raw_readl(osc_base + SPLLCON);
+
+       plliclk = (spllcon & ICLK_MASK);
+       pllidiv = ((spllcon >> 8) & PLLDIV_MASK) + 1;
+       pllodiv = ((spllcon >> 24) & PLLDIV_MASK);
+       pllmult = ((spllcon >> 16) & PLLMUL_MASK) + 1;
+       frcdiv = ((osccon >> 24) & PLLDIV_MASK);
+
+       pllclk = plliclk ? FRC_CLK : PIC32_POSC_FREQ;
+       frcdivn = ((1 << frcdiv) + 1) + (128 * (frcdiv == 7));
+
+       if (pllodiv < 2)
+               pllodiv = 2;
+       else if (pllodiv < 5)
+               pllodiv = (1 << pllodiv);
+       else
+               pllodiv = 32;
+
+       curr_osc = (int)((osccon >> 12) & CUROSC_MASK);
+
+       switch (curr_osc) {
+       case FRC1:
+       case FRC2:
+               osc_freq = FRC_CLK / frcdivn;
+               break;
+       case SPLL:
+               osc_freq = ((pllclk / pllidiv) * pllmult) / pllodiv;
+               break;
+       case POSC:
+               osc_freq = PIC32_POSC_FREQ;
+               break;
+       default:
+               break;
+       }
+
+       iounmap(osc_base);
+
+       return osc_freq;
+}
+
+u32 pic32_get_pbclk(int bus)
+{
+       u32 clk_freq;
+       void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+       u32 pbxdiv = PB1DIV + ((bus - 1) * 0x10);
+       u32 pbdiv = (__raw_readl(osc_base + pbxdiv) & PB_MASK) + 1;
+
+       iounmap(osc_base);
+
+       clk_freq = pic32_get_sysclk();
+
+       return clk_freq / pbdiv;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c
new file mode 100644 (file)
index 0000000..d7b7834
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+#include <asm/fw/fw.h>
+
+#include "pic32mzda.h"
+#include "early_pin.h"
+
+/* Default early console parameters */
+#define EARLY_CONSOLE_PORT     1
+#define EARLY_CONSOLE_BAUDRATE 115200
+
+#define UART_ENABLE            BIT(15)
+#define UART_ENABLE_RX         BIT(12)
+#define UART_ENABLE_TX         BIT(10)
+#define UART_TX_FULL           BIT(9)
+
+/* UART1(x == 0) - UART6(x == 5) */
+#define UART_BASE(x)   ((x) * 0x0200)
+#define U_MODE(x)      UART_BASE(x)
+#define U_STA(x)       (UART_BASE(x) + 0x10)
+#define U_TXR(x)       (UART_BASE(x) + 0x20)
+#define U_BRG(x)       (UART_BASE(x) + 0x40)
+
+static void __iomem *uart_base;
+static char console_port = -1;
+
+static int __init configure_uart_pins(int port)
+{
+       switch (port) {
+       case 1:
+               pic32_pps_input(IN_FUNC_U2RX, IN_RPB0);
+               pic32_pps_output(OUT_FUNC_U2TX, OUT_RPG9);
+               break;
+       case 5:
+               pic32_pps_input(IN_FUNC_U6RX, IN_RPD0);
+               pic32_pps_output(OUT_FUNC_U6TX, OUT_RPB8);
+               break;
+       default:
+               return -1;
+       }
+
+       return 0;
+}
+
+static void __init configure_uart(char port, int baud)
+{
+       u32 pbclk;
+
+       pbclk = pic32_get_pbclk(2);
+
+       __raw_writel(0, uart_base + U_MODE(port));
+       __raw_writel(((pbclk / baud) / 16) - 1, uart_base + U_BRG(port));
+       __raw_writel(UART_ENABLE, uart_base + U_MODE(port));
+       __raw_writel(UART_ENABLE_TX | UART_ENABLE_RX,
+                    uart_base + PIC32_SET(U_STA(port)));
+}
+
+static void __init setup_early_console(char port, int baud)
+{
+       if (configure_uart_pins(port))
+               return;
+
+       console_port = port;
+       configure_uart(console_port, baud);
+}
+
+static char * __init pic32_getcmdline(void)
+{
+       /*
+        * arch_mem_init() has not been called yet, so we don't have a real
+        * command line setup if using CONFIG_CMDLINE_BOOL.
+        */
+#ifdef CONFIG_CMDLINE_OVERRIDE
+       return CONFIG_CMDLINE;
+#else
+       return fw_getcmdline();
+#endif
+}
+
+static int __init get_port_from_cmdline(char *arch_cmdline)
+{
+       char *s;
+       int port = -1;
+
+       if (!arch_cmdline || *arch_cmdline == '\0')
+               goto _out;
+
+       s = strstr(arch_cmdline, "earlyprintk=");
+       if (s) {
+               s = strstr(s, "ttyS");
+               if (s)
+                       s += 4;
+               else
+                       goto _out;
+
+               port = (*s) - '0';
+       }
+
+_out:
+       return port;
+}
+
+static int __init get_baud_from_cmdline(char *arch_cmdline)
+{
+       char *s;
+       int baud = -1;
+
+       if (!arch_cmdline || *arch_cmdline == '\0')
+               goto _out;
+
+       s = strstr(arch_cmdline, "earlyprintk=");
+       if (s) {
+               s = strstr(s, "ttyS");
+               if (s)
+                       s += 6;
+               else
+                       goto _out;
+
+               baud = 0;
+               while (*s >= '0' && *s <= '9')
+                       baud = baud * 10 + *s++ - '0';
+       }
+
+_out:
+       return baud;
+}
+
+void __init fw_init_early_console(char port)
+{
+       char *arch_cmdline = pic32_getcmdline();
+       int baud = -1;
+
+       uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00);
+
+       baud = get_baud_from_cmdline(arch_cmdline);
+       if (port == -1)
+               port = get_port_from_cmdline(arch_cmdline);
+
+       if (port == -1)
+               port = EARLY_CONSOLE_PORT;
+
+       if (baud == -1)
+               baud = EARLY_CONSOLE_BAUDRATE;
+
+       setup_early_console(port, baud);
+}
+
+int prom_putchar(char c)
+{
+       if (console_port >= 0) {
+               while (__raw_readl(
+                               uart_base + U_STA(console_port)) & UART_TX_FULL)
+                       ;
+
+               __raw_writel(c, uart_base + U_TXR(console_port));
+       }
+
+       return 1;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c
new file mode 100644 (file)
index 0000000..aa673f8
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <asm/io.h>
+
+#include "early_pin.h"
+
+#define PPS_BASE 0x1f800000
+
+/* Input PPS Registers */
+#define INT1R 0x1404
+#define INT2R 0x1408
+#define INT3R 0x140C
+#define INT4R 0x1410
+#define T2CKR 0x1418
+#define T3CKR 0x141C
+#define T4CKR 0x1420
+#define T5CKR 0x1424
+#define T6CKR 0x1428
+#define T7CKR 0x142C
+#define T8CKR 0x1430
+#define T9CKR 0x1434
+#define IC1R 0x1438
+#define IC2R 0x143C
+#define IC3R 0x1440
+#define IC4R 0x1444
+#define IC5R 0x1448
+#define IC6R 0x144C
+#define IC7R 0x1450
+#define IC8R 0x1454
+#define IC9R 0x1458
+#define OCFAR 0x1460
+#define U1RXR 0x1468
+#define U1CTSR 0x146C
+#define U2RXR 0x1470
+#define U2CTSR 0x1474
+#define U3RXR 0x1478
+#define U3CTSR 0x147C
+#define U4RXR 0x1480
+#define U4CTSR 0x1484
+#define U5RXR 0x1488
+#define U5CTSR 0x148C
+#define U6RXR 0x1490
+#define U6CTSR 0x1494
+#define SDI1R 0x149C
+#define SS1R 0x14A0
+#define SDI2R 0x14A8
+#define SS2R 0x14AC
+#define SDI3R 0x14B4
+#define SS3R 0x14B8
+#define SDI4R 0x14C0
+#define SS4R 0x14C4
+#define SDI5R 0x14CC
+#define SS5R 0x14D0
+#define SDI6R 0x14D8
+#define SS6R 0x14DC
+#define C1RXR 0x14E0
+#define C2RXR 0x14E4
+#define REFCLKI1R 0x14E8
+#define REFCLKI3R 0x14F0
+#define REFCLKI4R 0x14F4
+
+static const struct
+{
+       int function;
+       int reg;
+} input_pin_reg[] = {
+       { IN_FUNC_INT3, INT3R },
+       { IN_FUNC_T2CK, T2CKR },
+       { IN_FUNC_T6CK, T6CKR },
+       { IN_FUNC_IC3, IC3R  },
+       { IN_FUNC_IC7, IC7R },
+       { IN_FUNC_U1RX, U1RXR },
+       { IN_FUNC_U2CTS, U2CTSR },
+       { IN_FUNC_U5RX, U5RXR },
+       { IN_FUNC_U6CTS, U6CTSR },
+       { IN_FUNC_SDI1, SDI1R },
+       { IN_FUNC_SDI3, SDI3R },
+       { IN_FUNC_SDI5, SDI5R },
+       { IN_FUNC_SS6, SS6R },
+       { IN_FUNC_REFCLKI1, REFCLKI1R },
+       { IN_FUNC_INT4, INT4R },
+       { IN_FUNC_T5CK, T5CKR },
+       { IN_FUNC_T7CK, T7CKR },
+       { IN_FUNC_IC4, IC4R },
+       { IN_FUNC_IC8, IC8R },
+       { IN_FUNC_U3RX, U3RXR },
+       { IN_FUNC_U4CTS, U4CTSR },
+       { IN_FUNC_SDI2, SDI2R },
+       { IN_FUNC_SDI4, SDI4R },
+       { IN_FUNC_C1RX, C1RXR },
+       { IN_FUNC_REFCLKI4, REFCLKI4R },
+       { IN_FUNC_INT2, INT2R },
+       { IN_FUNC_T3CK, T3CKR },
+       { IN_FUNC_T8CK, T8CKR },
+       { IN_FUNC_IC2, IC2R },
+       { IN_FUNC_IC5, IC5R },
+       { IN_FUNC_IC9, IC9R },
+       { IN_FUNC_U1CTS, U1CTSR },
+       { IN_FUNC_U2RX, U2RXR },
+       { IN_FUNC_U5CTS, U5CTSR },
+       { IN_FUNC_SS1, SS1R },
+       { IN_FUNC_SS3, SS3R },
+       { IN_FUNC_SS4, SS4R },
+       { IN_FUNC_SS5, SS5R },
+       { IN_FUNC_C2RX, C2RXR },
+       { IN_FUNC_INT1, INT1R },
+       { IN_FUNC_T4CK, T4CKR },
+       { IN_FUNC_T9CK, T9CKR },
+       { IN_FUNC_IC1, IC1R },
+       { IN_FUNC_IC6, IC6R },
+       { IN_FUNC_U3CTS, U3CTSR },
+       { IN_FUNC_U4RX, U4RXR },
+       { IN_FUNC_U6RX, U6RXR },
+       { IN_FUNC_SS2, SS2R },
+       { IN_FUNC_SDI6, SDI6R },
+       { IN_FUNC_OCFA, OCFAR },
+       { IN_FUNC_REFCLKI3, REFCLKI3R },
+};
+
+void pic32_pps_input(int function, int pin)
+{
+       void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) {
+               if (input_pin_reg[i].function == function) {
+                       __raw_writel(pin, pps_base + input_pin_reg[i].reg);
+                       return;
+               }
+       }
+
+       iounmap(pps_base);
+}
+
+/* Output PPS Registers */
+#define RPA14R 0x1538
+#define RPA15R 0x153C
+#define RPB0R 0x1540
+#define RPB1R 0x1544
+#define RPB2R 0x1548
+#define RPB3R 0x154C
+#define RPB5R 0x1554
+#define RPB6R 0x1558
+#define RPB7R 0x155C
+#define RPB8R 0x1560
+#define RPB9R 0x1564
+#define RPB10R 0x1568
+#define RPB14R 0x1578
+#define RPB15R 0x157C
+#define RPC1R 0x1584
+#define RPC2R 0x1588
+#define RPC3R 0x158C
+#define RPC4R 0x1590
+#define RPC13R 0x15B4
+#define RPC14R 0x15B8
+#define RPD0R 0x15C0
+#define RPD1R 0x15C4
+#define RPD2R 0x15C8
+#define RPD3R 0x15CC
+#define RPD4R 0x15D0
+#define RPD5R 0x15D4
+#define RPD6R 0x15D8
+#define RPD7R 0x15DC
+#define RPD9R 0x15E4
+#define RPD10R 0x15E8
+#define RPD11R 0x15EC
+#define RPD12R 0x15F0
+#define RPD14R 0x15F8
+#define RPD15R 0x15FC
+#define RPE3R 0x160C
+#define RPE5R 0x1614
+#define RPE8R 0x1620
+#define RPE9R 0x1624
+#define RPF0R 0x1640
+#define RPF1R 0x1644
+#define RPF2R 0x1648
+#define RPF3R 0x164C
+#define RPF4R 0x1650
+#define RPF5R 0x1654
+#define RPF8R 0x1660
+#define RPF12R 0x1670
+#define RPF13R 0x1674
+#define RPG0R 0x1680
+#define RPG1R 0x1684
+#define RPG6R 0x1698
+#define RPG7R 0x169C
+#define RPG8R 0x16A0
+#define RPG9R 0x16A4
+
+static const struct
+{
+       int pin;
+       int reg;
+} output_pin_reg[] = {
+       { OUT_RPD2, RPD2R },
+       { OUT_RPG8, RPG8R },
+       { OUT_RPF4, RPF4R },
+       { OUT_RPD10, RPD10R },
+       { OUT_RPF1, RPF1R },
+       { OUT_RPB9, RPB9R },
+       { OUT_RPB10, RPB10R },
+       { OUT_RPC14, RPC14R },
+       { OUT_RPB5, RPB5R },
+       { OUT_RPC1, RPC1R },
+       { OUT_RPD14, RPD14R },
+       { OUT_RPG1, RPG1R },
+       { OUT_RPA14, RPA14R },
+       { OUT_RPD6, RPD6R },
+       { OUT_RPD3, RPD3R },
+       { OUT_RPG7, RPG7R },
+       { OUT_RPF5, RPF5R },
+       { OUT_RPD11, RPD11R },
+       { OUT_RPF0, RPF0R },
+       { OUT_RPB1, RPB1R },
+       { OUT_RPE5, RPE5R },
+       { OUT_RPC13, RPC13R },
+       { OUT_RPB3, RPB3R },
+       { OUT_RPC4, RPC4R },
+       { OUT_RPD15, RPD15R },
+       { OUT_RPG0, RPG0R },
+       { OUT_RPA15, RPA15R },
+       { OUT_RPD7, RPD7R },
+       { OUT_RPD9, RPD9R },
+       { OUT_RPG6, RPG6R },
+       { OUT_RPB8, RPB8R },
+       { OUT_RPB15, RPB15R },
+       { OUT_RPD4, RPD4R },
+       { OUT_RPB0, RPB0R },
+       { OUT_RPE3, RPE3R },
+       { OUT_RPB7, RPB7R },
+       { OUT_RPF12, RPF12R },
+       { OUT_RPD12, RPD12R },
+       { OUT_RPF8, RPF8R },
+       { OUT_RPC3, RPC3R },
+       { OUT_RPE9, RPE9R },
+       { OUT_RPD1, RPD1R },
+       { OUT_RPG9, RPG9R },
+       { OUT_RPB14, RPB14R },
+       { OUT_RPD0, RPD0R },
+       { OUT_RPB6, RPB6R },
+       { OUT_RPD5, RPD5R },
+       { OUT_RPB2, RPB2R },
+       { OUT_RPF3, RPF3R },
+       { OUT_RPF13, RPF13R },
+       { OUT_RPC2, RPC2R },
+       { OUT_RPE8, RPE8R },
+       { OUT_RPF2, RPF2R },
+};
+
+void pic32_pps_output(int function, int pin)
+{
+       void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) {
+               if (output_pin_reg[i].pin == pin) {
+                       __raw_writel(function,
+                               pps_base + output_pin_reg[i].reg);
+                       return;
+               }
+       }
+
+       iounmap(pps_base);
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.h b/arch/mips/pic32/pic32mzda/early_pin.h
new file mode 100644 (file)
index 0000000..417fae9
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef _PIC32MZDA_EARLY_PIN_H
+#define _PIC32MZDA_EARLY_PIN_H
+
+/*
+ * This is a complete, yet overly simplistic and unoptimized, PIC32MZDA PPS
+ * configuration only useful before we have full pinctrl initialized.
+ */
+
+/* Input PPS Functions */
+enum {
+       IN_FUNC_INT3,
+       IN_FUNC_T2CK,
+       IN_FUNC_T6CK,
+       IN_FUNC_IC3,
+       IN_FUNC_IC7,
+       IN_FUNC_U1RX,
+       IN_FUNC_U2CTS,
+       IN_FUNC_U5RX,
+       IN_FUNC_U6CTS,
+       IN_FUNC_SDI1,
+       IN_FUNC_SDI3,
+       IN_FUNC_SDI5,
+       IN_FUNC_SS6,
+       IN_FUNC_REFCLKI1,
+       IN_FUNC_INT4,
+       IN_FUNC_T5CK,
+       IN_FUNC_T7CK,
+       IN_FUNC_IC4,
+       IN_FUNC_IC8,
+       IN_FUNC_U3RX,
+       IN_FUNC_U4CTS,
+       IN_FUNC_SDI2,
+       IN_FUNC_SDI4,
+       IN_FUNC_C1RX,
+       IN_FUNC_REFCLKI4,
+       IN_FUNC_INT2,
+       IN_FUNC_T3CK,
+       IN_FUNC_T8CK,
+       IN_FUNC_IC2,
+       IN_FUNC_IC5,
+       IN_FUNC_IC9,
+       IN_FUNC_U1CTS,
+       IN_FUNC_U2RX,
+       IN_FUNC_U5CTS,
+       IN_FUNC_SS1,
+       IN_FUNC_SS3,
+       IN_FUNC_SS4,
+       IN_FUNC_SS5,
+       IN_FUNC_C2RX,
+       IN_FUNC_INT1,
+       IN_FUNC_T4CK,
+       IN_FUNC_T9CK,
+       IN_FUNC_IC1,
+       IN_FUNC_IC6,
+       IN_FUNC_U3CTS,
+       IN_FUNC_U4RX,
+       IN_FUNC_U6RX,
+       IN_FUNC_SS2,
+       IN_FUNC_SDI6,
+       IN_FUNC_OCFA,
+       IN_FUNC_REFCLKI3,
+};
+
+/* Input PPS Pins */
+#define IN_RPD2 0x00
+#define IN_RPG8 0x01
+#define IN_RPF4 0x02
+#define IN_RPD10 0x03
+#define IN_RPF1 0x04
+#define IN_RPB9 0x05
+#define IN_RPB10 0x06
+#define IN_RPC14 0x07
+#define IN_RPB5 0x08
+#define IN_RPC1 0x0A
+#define IN_RPD14 0x0B
+#define IN_RPG1 0x0C
+#define IN_RPA14 0x0D
+#define IN_RPD6 0x0E
+#define IN_RPD3 0x00
+#define IN_RPG7 0x01
+#define IN_RPF5 0x02
+#define IN_RPD11 0x03
+#define IN_RPF0 0x04
+#define IN_RPB1 0x05
+#define IN_RPE5 0x06
+#define IN_RPC13 0x07
+#define IN_RPB3 0x08
+#define IN_RPC4 0x0A
+#define IN_RPD15 0x0B
+#define IN_RPG0 0x0C
+#define IN_RPA15 0x0D
+#define IN_RPD7 0x0E
+#define IN_RPD9 0x00
+#define IN_RPG6 0x01
+#define IN_RPB8 0x02
+#define IN_RPB15 0x03
+#define IN_RPD4 0x04
+#define IN_RPB0 0x05
+#define IN_RPE3 0x06
+#define IN_RPB7 0x07
+#define IN_RPF12 0x09
+#define IN_RPD12 0x0A
+#define IN_RPF8 0x0B
+#define IN_RPC3 0x0C
+#define IN_RPE9 0x0D
+#define IN_RPD1 0x00
+#define IN_RPG9 0x01
+#define IN_RPB14 0x02
+#define IN_RPD0 0x03
+#define IN_RPB6 0x05
+#define IN_RPD5 0x06
+#define IN_RPB2 0x07
+#define IN_RPF3 0x08
+#define IN_RPF13 0x09
+#define IN_RPF2 0x0B
+#define IN_RPC2 0x0C
+#define IN_RPE8 0x0D
+
+/* Output PPS Pins */
+enum {
+       OUT_RPD2,
+       OUT_RPG8,
+       OUT_RPF4,
+       OUT_RPD10,
+       OUT_RPF1,
+       OUT_RPB9,
+       OUT_RPB10,
+       OUT_RPC14,
+       OUT_RPB5,
+       OUT_RPC1,
+       OUT_RPD14,
+       OUT_RPG1,
+       OUT_RPA14,
+       OUT_RPD6,
+       OUT_RPD3,
+       OUT_RPG7,
+       OUT_RPF5,
+       OUT_RPD11,
+       OUT_RPF0,
+       OUT_RPB1,
+       OUT_RPE5,
+       OUT_RPC13,
+       OUT_RPB3,
+       OUT_RPC4,
+       OUT_RPD15,
+       OUT_RPG0,
+       OUT_RPA15,
+       OUT_RPD7,
+       OUT_RPD9,
+       OUT_RPG6,
+       OUT_RPB8,
+       OUT_RPB15,
+       OUT_RPD4,
+       OUT_RPB0,
+       OUT_RPE3,
+       OUT_RPB7,
+       OUT_RPF12,
+       OUT_RPD12,
+       OUT_RPF8,
+       OUT_RPC3,
+       OUT_RPE9,
+       OUT_RPD1,
+       OUT_RPG9,
+       OUT_RPB14,
+       OUT_RPD0,
+       OUT_RPB6,
+       OUT_RPD5,
+       OUT_RPB2,
+       OUT_RPF3,
+       OUT_RPF13,
+       OUT_RPC2,
+       OUT_RPE8,
+       OUT_RPF2,
+};
+
+/* Output PPS Functions */
+#define OUT_FUNC_U3TX 0x01
+#define OUT_FUNC_U4RTS 0x02
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_SS6 0x0A
+#define OUT_FUNC_OC3 0x0B
+#define OUT_FUNC_OC6 0x0C
+#define OUT_FUNC_REFCLKO4 0x0D
+#define OUT_FUNC_C2OUT 0x0E
+#define OUT_FUNC_C1TX 0x0F
+#define OUT_FUNC_U1TX 0x01
+#define OUT_FUNC_U2RTS 0x02
+#define OUT_FUNC_U5TX 0x03
+#define OUT_FUNC_U6RTS 0x04
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_OC4 0x0B
+#define OUT_FUNC_OC7 0x0C
+#define OUT_FUNC_REFCLKO1 0x0F
+#define OUT_FUNC_U3RTS 0x01
+#define OUT_FUNC_U4TX 0x02
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS1 0x05
+#define OUT_FUNC_SS3 0x07
+#define OUT_FUNC_SS4 0x08
+#define OUT_FUNC_SS5 0x09
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC5 0x0B
+#define OUT_FUNC_OC8 0x0C
+#define OUT_FUNC_C1OUT 0x0E
+#define OUT_FUNC_REFCLKO3 0x0F
+#define OUT_FUNC_U1RTS 0x01
+#define OUT_FUNC_U2TX 0x02
+#define OUT_FUNC_U5RTS 0x03
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS2 0x06
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC2 0x0B
+#define OUT_FUNC_OC1 0x0C
+#define OUT_FUNC_OC9 0x0D
+#define OUT_FUNC_C2TX 0x0F
+
+void pic32_pps_input(int function, int pin);
+void pic32_pps_output(int function, int pin);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
new file mode 100644 (file)
index 0000000..775ff90
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Joshua Henderson, joshua.henderson@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/sdhci-pic32.h>
+
+#include <asm/fw/fw.h>
+#include <asm/mips-boards/generic.h>
+#include <asm/prom.h>
+
+#include "pic32mzda.h"
+
+const char *get_system_type(void)
+{
+       return "PIC32MZDA";
+}
+
+static ulong get_fdtaddr(void)
+{
+       ulong ftaddr = 0;
+
+       if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3)
+               return (ulong)fw_arg1;
+
+       if (__dtb_start < __dtb_end)
+               ftaddr = (ulong)__dtb_start;
+
+       return ftaddr;
+}
+
+void __init plat_mem_setup(void)
+{
+       void *dtb;
+
+       dtb = (void *)get_fdtaddr();
+       if (!dtb) {
+               pr_err("pic32: no DTB found.\n");
+               return;
+       }
+
+       /*
+        * Load the builtin device tree. This causes the chosen node to be
+        * parsed resulting in our memory appearing.
+        */
+       __dt_setup_arch(dtb);
+
+       pr_info("Found following command lines\n");
+       pr_info(" boot_command_line: %s\n", boot_command_line);
+       pr_info(" arcs_cmdline     : %s\n", arcs_cmdline);
+#ifdef CONFIG_CMDLINE_BOOL
+       pr_info(" builtin_cmdline  : %s\n", CONFIG_CMDLINE);
+#endif
+       if (dtb != __dtb_start)
+               strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+#ifdef CONFIG_EARLY_PRINTK
+       fw_init_early_console(-1);
+#endif
+       pic32_config_init();
+}
+
+static __init void pic32_init_cmdline(int argc, char *argv[])
+{
+       unsigned int count = COMMAND_LINE_SIZE - 1;
+       int i;
+       char *dst = &(arcs_cmdline[0]);
+       char *src;
+
+       for (i = 1; i < argc && count; ++i) {
+               src = argv[i];
+               while (*src && count) {
+                       *dst++ = *src++;
+                       --count;
+               }
+               *dst++ = ' ';
+       }
+       if (i > 1)
+               --dst;
+
+       *dst = 0;
+}
+
+void __init prom_init(void)
+{
+       pic32_init_cmdline((int)fw_arg0, (char **)fw_arg1);
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+void __init device_tree_init(void)
+{
+       if (!initial_boot_params)
+               return;
+
+       unflatten_and_copy_device_tree();
+}
+
+static struct pic32_sdhci_platform_data sdhci_data = {
+       .setup_dma = pic32_set_sdhci_adma_fifo_threshold,
+};
+
+static struct of_dev_auxdata pic32_auxdata_lookup[] __initdata = {
+       OF_DEV_AUXDATA("microchip,pic32mzda-sdhci", 0, "sdhci", &sdhci_data),
+       { /* sentinel */}
+};
+
+static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup)
+{
+       struct device_node *root, *np;
+       struct resource res;
+
+       root = of_find_node_by_path("/");
+
+       for (; lookup->compatible; lookup++) {
+               np = of_find_compatible_node(NULL, NULL, lookup->compatible);
+               if (np) {
+                       lookup->name = (char *)np->name;
+                       if (lookup->phys_addr)
+                               continue;
+                       if (!of_address_to_resource(np, 0, &res))
+                               lookup->phys_addr = res.start;
+               }
+       }
+
+       return 0;
+}
+
+static int __init plat_of_setup(void)
+{
+       if (!of_have_populated_dt())
+               panic("Device tree not present");
+
+       pic32_of_prepare_platform_data(pic32_auxdata_lookup);
+       if (of_platform_populate(NULL, of_default_bus_match_table,
+                                pic32_auxdata_lookup, NULL))
+               panic("Failed to populate DT");
+
+       return 0;
+}
+arch_initcall(plat_of_setup);
diff --git a/arch/mips/pic32/pic32mzda/pic32mzda.h b/arch/mips/pic32/pic32mzda/pic32mzda.h
new file mode 100644 (file)
index 0000000..96d10e2
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef PIC32MZDA_COMMON_H
+#define PIC32MZDA_COMMON_H
+
+/* early clock */
+u32 pic32_get_pbclk(int bus);
+u32 pic32_get_sysclk(void);
+
+/* Device configuration */
+void __init pic32_config_init(void);
+int pic32_set_lcd_mode(int mode);
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrs, u32 wthrs);
+u32 pic32_get_boot_status(void);
+int pic32_disable_lcd(void);
+int pic32_enable_lcd(void);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
new file mode 100644 (file)
index 0000000..ca6a62b
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/time.h>
+
+#include "pic32mzda.h"
+
+static const struct of_device_id pic32_infra_match[] = {
+       { .compatible = "microchip,pic32mzda-infra", },
+       { },
+};
+
+#define DEFAULT_CORE_TIMER_INTERRUPT 0
+
+static unsigned int pic32_xlate_core_timer_irq(void)
+{
+       static struct device_node *node;
+       unsigned int irq;
+
+       node = of_find_matching_node(NULL, pic32_infra_match);
+
+       if (WARN_ON(!node))
+               goto default_map;
+
+       irq = irq_of_parse_and_map(node, 0);
+       if (!irq)
+               goto default_map;
+
+       return irq;
+
+default_map:
+
+       return irq_create_mapping(NULL, DEFAULT_CORE_TIMER_INTERRUPT);
+}
+
+unsigned int get_c0_compare_int(void)
+{
+       return pic32_xlate_core_timer_irq();
+}
+
+void __init plat_time_init(void)
+{
+       struct clk *clk;
+
+       of_clk_init(NULL);
+       clk = clk_get_sys("cpu_clk", NULL);
+       if (IS_ERR(clk))
+               panic("unable to get CPU clock, err=%ld", PTR_ERR(clk));
+
+       clk_prepare_enable(clk);
+       pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
+       mips_hpt_frequency = clk_get_rate(clk) / 2;
+
+       clocksource_probe();
+}
index e9bc8c96174e227a8d9cf730bcc4bfb0ff0fb1cf..813826a456ca33d30ff84dfb9724a2796871fb4a 100644 (file)
@@ -12,6 +12,11 @@ config RALINK_ILL_ACC
        depends on SOC_RT305X
        default y
 
+config IRQ_INTC
+       bool
+       default y
+       depends on !SOC_MT7621
+
 choice
        prompt "Ralink SoC selection"
        default SOC_RT305X
@@ -33,7 +38,18 @@ choice
 
        config SOC_MT7620
                bool "MT7620/8"
+               select HW_HAS_PCI
 
+       config SOC_MT7621
+               bool "MT7621"
+               select MIPS_CPU_SCACHE
+               select SYS_SUPPORTS_MULTITHREADING
+               select SYS_SUPPORTS_SMP
+               select SYS_SUPPORTS_MIPS_CPS
+               select MIPS_GIC
+               select COMMON_CLK
+               select CLKSRC_MIPS_GIC
+               select HW_HAS_PCI
 endchoice
 
 choice
index a6c9d00613269f6a5cbb1d416eb8b8450b9f3a65..0d1795a0321e373d93adbfc073be4ac67423d97b 100644 (file)
@@ -6,16 +6,24 @@
 # Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
 # Copyright (C) 2013 John Crispin <blogic@openwrt.org>
 
-obj-y := prom.o of.o reset.o clk.o irq.o timer.o
+obj-y := prom.o of.o reset.o
+
+ifndef CONFIG_MIPS_GIC
+       obj-y += clk.o timer.o
+endif
 
 obj-$(CONFIG_CLKEVT_RT3352) += cevt-rt3352.o
 
 obj-$(CONFIG_RALINK_ILL_ACC) += ill_acc.o
 
+obj-$(CONFIG_IRQ_INTC) += irq.o
+obj-$(CONFIG_MIPS_GIC) += irq-gic.o timer-gic.o
+
 obj-$(CONFIG_SOC_RT288X) += rt288x.o
 obj-$(CONFIG_SOC_RT305X) += rt305x.o
 obj-$(CONFIG_SOC_RT3883) += rt3883.o
 obj-$(CONFIG_SOC_MT7620) += mt7620.o
+obj-$(CONFIG_SOC_MT7621) += mt7621.o
 
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 
index 6d9c8c499f988d4949f72c6484f8e948dff6fec2..6095fcc334f42377b2bc3b44631eb429154e85a0 100644 (file)
@@ -27,3 +27,8 @@ cflags-$(CONFIG_SOC_RT3883)   += -I$(srctree)/arch/mips/include/asm/mach-ralink/rt
 #
 load-$(CONFIG_SOC_MT7620)      += 0xffffffff80000000
 cflags-$(CONFIG_SOC_MT7620)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7620
+
+# Ralink MT7621
+#
+load-$(CONFIG_SOC_MT7621)      += 0xffffffff80001000
+cflags-$(CONFIG_SOC_MT7621)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7621
diff --git a/arch/mips/ralink/irq-gic.c b/arch/mips/ralink/irq-gic.c
new file mode 100644 (file)
index 0000000..50d6c55
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/mips-gic.h>
+
+int get_c0_perfcount_int(void)
+{
+       return gic_get_c0_perfcount_int();
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
+
+void __init arch_init_irq(void)
+{
+       irqchip_init();
+}
index dfb04fcedb042d9b80621ac3197886a1ea3bccda..0d3d1a97895f167d613cd469422dc4e2bfe6087c 100644 (file)
@@ -107,31 +107,31 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
 };
 
 static struct rt2880_pmx_func pwm1_grp_mt7628[] = {
-       FUNC("sdcx", 3, 19, 1),
+       FUNC("sdxc d6", 3, 19, 1),
        FUNC("utif", 2, 19, 1),
        FUNC("gpio", 1, 19, 1),
-       FUNC("pwm", 0, 19, 1),
+       FUNC("pwm1", 0, 19, 1),
 };
 
 static struct rt2880_pmx_func pwm0_grp_mt7628[] = {
-       FUNC("sdcx", 3, 18, 1),
+       FUNC("sdxc d7", 3, 18, 1),
        FUNC("utif", 2, 18, 1),
        FUNC("gpio", 1, 18, 1),
-       FUNC("pwm", 0, 18, 1),
+       FUNC("pwm0", 0, 18, 1),
 };
 
 static struct rt2880_pmx_func uart2_grp_mt7628[] = {
-       FUNC("sdcx", 3, 20, 2),
+       FUNC("sdxc d5 d4", 3, 20, 2),
        FUNC("pwm", 2, 20, 2),
        FUNC("gpio", 1, 20, 2),
-       FUNC("uart", 0, 20, 2),
+       FUNC("uart2", 0, 20, 2),
 };
 
 static struct rt2880_pmx_func uart1_grp_mt7628[] = {
-       FUNC("sdcx", 3, 45, 2),
+       FUNC("sw_r", 3, 45, 2),
        FUNC("pwm", 2, 45, 2),
        FUNC("gpio", 1, 45, 2),
-       FUNC("uart", 0, 45, 2),
+       FUNC("uart1", 0, 45, 2),
 };
 
 static struct rt2880_pmx_func i2c_grp_mt7628[] = {
@@ -143,21 +143,21 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = {
 
 static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) };
 static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) };
-static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 15, 38) };
+static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) };
 static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) };
 
 static struct rt2880_pmx_func sd_mode_grp_mt7628[] = {
        FUNC("jtag", 3, 22, 8),
        FUNC("utif", 2, 22, 8),
        FUNC("gpio", 1, 22, 8),
-       FUNC("sdcx", 0, 22, 8),
+       FUNC("sdxc", 0, 22, 8),
 };
 
 static struct rt2880_pmx_func uart0_grp_mt7628[] = {
        FUNC("-", 3, 12, 2),
        FUNC("-", 2, 12, 2),
        FUNC("gpio", 1, 12, 2),
-       FUNC("uart", 0, 12, 2),
+       FUNC("uart0", 0, 12, 2),
 };
 
 static struct rt2880_pmx_func i2s_grp_mt7628[] = {
@@ -171,7 +171,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
        FUNC("-", 3, 6, 1),
        FUNC("refclk", 2, 6, 1),
        FUNC("gpio", 1, 6, 1),
-       FUNC("spi", 0, 6, 1),
+       FUNC("spi cs1", 0, 6, 1),
 };
 
 static struct rt2880_pmx_func spis_grp_mt7628[] = {
@@ -188,28 +188,44 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = {
        FUNC("gpio", 0, 11, 1),
 };
 
-#define MT7628_GPIO_MODE_MASK  0x3
-
-#define MT7628_GPIO_MODE_PWM1  30
-#define MT7628_GPIO_MODE_PWM0  28
-#define MT7628_GPIO_MODE_UART2 26
-#define MT7628_GPIO_MODE_UART1 24
-#define MT7628_GPIO_MODE_I2C   20
-#define MT7628_GPIO_MODE_REFCLK        18
-#define MT7628_GPIO_MODE_PERST 16
-#define MT7628_GPIO_MODE_WDT   14
-#define MT7628_GPIO_MODE_SPI   12
-#define MT7628_GPIO_MODE_SDMODE        10
-#define MT7628_GPIO_MODE_UART0 8
-#define MT7628_GPIO_MODE_I2S   6
-#define MT7628_GPIO_MODE_CS1   4
-#define MT7628_GPIO_MODE_SPIS  2
-#define MT7628_GPIO_MODE_GPIO  0
+static struct rt2880_pmx_func wled_kn_grp_mt7628[] = {
+       FUNC("rsvd", 3, 35, 1),
+       FUNC("rsvd", 2, 35, 1),
+       FUNC("gpio", 1, 35, 1),
+       FUNC("wled_kn", 0, 35, 1),
+};
+
+static struct rt2880_pmx_func wled_an_grp_mt7628[] = {
+       FUNC("rsvd", 3, 35, 1),
+       FUNC("rsvd", 2, 35, 1),
+       FUNC("gpio", 1, 35, 1),
+       FUNC("wled_an", 0, 35, 1),
+};
+
+#define MT7628_GPIO_MODE_MASK          0x3
+
+#define MT7628_GPIO_MODE_WLED_KN       48
+#define MT7628_GPIO_MODE_WLED_AN       32
+#define MT7628_GPIO_MODE_PWM1          30
+#define MT7628_GPIO_MODE_PWM0          28
+#define MT7628_GPIO_MODE_UART2         26
+#define MT7628_GPIO_MODE_UART1         24
+#define MT7628_GPIO_MODE_I2C           20
+#define MT7628_GPIO_MODE_REFCLK                18
+#define MT7628_GPIO_MODE_PERST         16
+#define MT7628_GPIO_MODE_WDT           14
+#define MT7628_GPIO_MODE_SPI           12
+#define MT7628_GPIO_MODE_SDMODE                10
+#define MT7628_GPIO_MODE_UART0         8
+#define MT7628_GPIO_MODE_I2S           6
+#define MT7628_GPIO_MODE_CS1           4
+#define MT7628_GPIO_MODE_SPIS          2
+#define MT7628_GPIO_MODE_GPIO          0
 
 static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
        GRP_G("pmw1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_PWM1),
-       GRP_G("pmw1", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
+       GRP_G("pmw0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_PWM0),
        GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_UART2),
@@ -233,6 +249,10 @@ static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
                                1, MT7628_GPIO_MODE_SPIS),
        GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK,
                                1, MT7628_GPIO_MODE_GPIO),
+       GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+                               1, MT7628_GPIO_MODE_WLED_AN),
+       GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+                               1, MT7628_GPIO_MODE_WLED_KN),
        { 0 }
 };
 
@@ -436,10 +456,13 @@ void __init ralink_clk_init(void)
        ralink_clk_add("10000100.timer", periph_rate);
        ralink_clk_add("10000120.watchdog", periph_rate);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000c00.uartlite", periph_rate);
+       ralink_clk_add("10000d00.uart1", periph_rate);
+       ralink_clk_add("10000e00.uart2", periph_rate);
        ralink_clk_add("10180000.wmac", xtal_rate);
 
-       if (IS_ENABLED(CONFIG_USB) && is_mt76x8()) {
+       if (IS_ENABLED(CONFIG_USB) && !is_mt76x8()) {
                /*
                 * When the CPU goes into sleep mode, the BUS clock will be
                 * too low for USB to function properly. Adjust the busses
@@ -552,7 +575,7 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
        }
 
        snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
-               "Ralink %s ver:%u eco:%u",
+               "MediaTek %s ver:%u eco:%u",
                name,
                (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
                (rev & CHIP_REV_ECO_MASK));
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
new file mode 100644 (file)
index 0000000..e9b9fa3
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/mipsregs.h>
+#include <asm/smp-ops.h>
+#include <asm/mips-cm.h>
+#include <asm/mips-cpc.h>
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7621.h>
+
+#include <pinmux.h>
+
+#include "common.h"
+
+#define SYSC_REG_SYSCFG                0x10
+#define SYSC_REG_CPLL_CLKCFG0  0x2c
+#define SYSC_REG_CUR_CLK_STS   0x44
+#define CPU_CLK_SEL            (BIT(30) | BIT(31))
+
+#define MT7621_GPIO_MODE_UART1         1
+#define MT7621_GPIO_MODE_I2C           2
+#define MT7621_GPIO_MODE_UART3_MASK    0x3
+#define MT7621_GPIO_MODE_UART3_SHIFT   3
+#define MT7621_GPIO_MODE_UART3_GPIO    1
+#define MT7621_GPIO_MODE_UART2_MASK    0x3
+#define MT7621_GPIO_MODE_UART2_SHIFT   5
+#define MT7621_GPIO_MODE_UART2_GPIO    1
+#define MT7621_GPIO_MODE_JTAG          7
+#define MT7621_GPIO_MODE_WDT_MASK      0x3
+#define MT7621_GPIO_MODE_WDT_SHIFT     8
+#define MT7621_GPIO_MODE_WDT_GPIO      1
+#define MT7621_GPIO_MODE_PCIE_RST      0
+#define MT7621_GPIO_MODE_PCIE_REF      2
+#define MT7621_GPIO_MODE_PCIE_MASK     0x3
+#define MT7621_GPIO_MODE_PCIE_SHIFT    10
+#define MT7621_GPIO_MODE_PCIE_GPIO     1
+#define MT7621_GPIO_MODE_MDIO_MASK     0x3
+#define MT7621_GPIO_MODE_MDIO_SHIFT    12
+#define MT7621_GPIO_MODE_MDIO_GPIO     1
+#define MT7621_GPIO_MODE_RGMII1                14
+#define MT7621_GPIO_MODE_RGMII2                15
+#define MT7621_GPIO_MODE_SPI_MASK      0x3
+#define MT7621_GPIO_MODE_SPI_SHIFT     16
+#define MT7621_GPIO_MODE_SPI_GPIO      1
+#define MT7621_GPIO_MODE_SDHCI_MASK    0x3
+#define MT7621_GPIO_MODE_SDHCI_SHIFT   18
+#define MT7621_GPIO_MODE_SDHCI_GPIO    1
+
+static struct rt2880_pmx_func uart1_grp[] =  { FUNC("uart1", 0, 1, 2) };
+static struct rt2880_pmx_func i2c_grp[] =  { FUNC("i2c", 0, 3, 2) };
+static struct rt2880_pmx_func uart3_grp[] = {
+       FUNC("uart3", 0, 5, 4),
+       FUNC("i2s", 2, 5, 4),
+       FUNC("spdif3", 3, 5, 4),
+};
+static struct rt2880_pmx_func uart2_grp[] = {
+       FUNC("uart2", 0, 9, 4),
+       FUNC("pcm", 2, 9, 4),
+       FUNC("spdif2", 3, 9, 4),
+};
+static struct rt2880_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) };
+static struct rt2880_pmx_func wdt_grp[] = {
+       FUNC("wdt rst", 0, 18, 1),
+       FUNC("wdt refclk", 2, 18, 1),
+};
+static struct rt2880_pmx_func pcie_rst_grp[] = {
+       FUNC("pcie rst", MT7621_GPIO_MODE_PCIE_RST, 19, 1),
+       FUNC("pcie refclk", MT7621_GPIO_MODE_PCIE_REF, 19, 1)
+};
+static struct rt2880_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) };
+static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) };
+static struct rt2880_pmx_func spi_grp[] = {
+       FUNC("spi", 0, 34, 7),
+       FUNC("nand1", 2, 34, 7),
+};
+static struct rt2880_pmx_func sdhci_grp[] = {
+       FUNC("sdhci", 0, 41, 8),
+       FUNC("nand2", 2, 41, 8),
+};
+static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) };
+
+static struct rt2880_pmx_group mt7621_pinmux_data[] = {
+       GRP("uart1", uart1_grp, 1, MT7621_GPIO_MODE_UART1),
+       GRP("i2c", i2c_grp, 1, MT7621_GPIO_MODE_I2C),
+       GRP_G("uart3", uart3_grp, MT7621_GPIO_MODE_UART3_MASK,
+               MT7621_GPIO_MODE_UART3_GPIO, MT7621_GPIO_MODE_UART3_SHIFT),
+       GRP_G("uart2", uart2_grp, MT7621_GPIO_MODE_UART2_MASK,
+               MT7621_GPIO_MODE_UART2_GPIO, MT7621_GPIO_MODE_UART2_SHIFT),
+       GRP("jtag", jtag_grp, 1, MT7621_GPIO_MODE_JTAG),
+       GRP_G("wdt", wdt_grp, MT7621_GPIO_MODE_WDT_MASK,
+               MT7621_GPIO_MODE_WDT_GPIO, MT7621_GPIO_MODE_WDT_SHIFT),
+       GRP_G("pcie", pcie_rst_grp, MT7621_GPIO_MODE_PCIE_MASK,
+               MT7621_GPIO_MODE_PCIE_GPIO, MT7621_GPIO_MODE_PCIE_SHIFT),
+       GRP_G("mdio", mdio_grp, MT7621_GPIO_MODE_MDIO_MASK,
+               MT7621_GPIO_MODE_MDIO_GPIO, MT7621_GPIO_MODE_MDIO_SHIFT),
+       GRP("rgmii2", rgmii2_grp, 1, MT7621_GPIO_MODE_RGMII2),
+       GRP_G("spi", spi_grp, MT7621_GPIO_MODE_SPI_MASK,
+               MT7621_GPIO_MODE_SPI_GPIO, MT7621_GPIO_MODE_SPI_SHIFT),
+       GRP_G("sdhci", sdhci_grp, MT7621_GPIO_MODE_SDHCI_MASK,
+               MT7621_GPIO_MODE_SDHCI_GPIO, MT7621_GPIO_MODE_SDHCI_SHIFT),
+       GRP("rgmii1", rgmii1_grp, 1, MT7621_GPIO_MODE_RGMII1),
+       { 0 }
+};
+
+phys_addr_t mips_cpc_default_phys_base(void)
+{
+       panic("Cannot detect cpc address");
+}
+
+void __init ralink_clk_init(void)
+{
+       int cpu_fdiv = 0;
+       int cpu_ffrac = 0;
+       int fbdiv = 0;
+       u32 clk_sts, syscfg;
+       u8 clk_sel = 0, xtal_mode;
+       u32 cpu_clk;
+
+       if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0)
+               clk_sel = 1;
+
+       switch (clk_sel) {
+       case 0:
+               clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS);
+               cpu_fdiv = ((clk_sts >> 8) & 0x1F);
+               cpu_ffrac = (clk_sts & 0x1F);
+               cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000;
+               break;
+
+       case 1:
+               fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1;
+               syscfg = rt_sysc_r32(SYSC_REG_SYSCFG);
+               xtal_mode = (syscfg >> 6) & 0x7;
+               if (xtal_mode >= 6) {
+                       /* 25Mhz Xtal */
+                       cpu_clk = 25 * fbdiv * 1000 * 1000;
+               } else if (xtal_mode >= 3) {
+                       /* 40Mhz Xtal */
+                       cpu_clk = 40 * fbdiv * 1000 * 1000;
+               } else {
+                       /* 20Mhz Xtal */
+                       cpu_clk = 20 * fbdiv * 1000 * 1000;
+               }
+               break;
+       }
+}
+
+void __init ralink_of_remap(void)
+{
+       rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc");
+       rt_memc_membase = plat_of_remap_node("mtk,mt7621-memc");
+
+       if (!rt_sysc_membase || !rt_memc_membase)
+               panic("Failed to remap core resources");
+}
+
+void prom_soc_init(struct ralink_soc_info *soc_info)
+{
+       void __iomem *sysc = (void __iomem *) KSEG1ADDR(MT7621_SYSC_BASE);
+       unsigned char *name = NULL;
+       u32 n0;
+       u32 n1;
+       u32 rev;
+
+       n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0);
+       n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1);
+
+       if (n0 == MT7621_CHIP_NAME0 && n1 == MT7621_CHIP_NAME1) {
+               name = "MT7621";
+               soc_info->compatible = "mtk,mt7621-soc";
+       } else {
+               panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", n0, n1);
+       }
+
+       rev = __raw_readl(sysc + SYSC_REG_CHIP_REV);
+
+       snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
+               "MediaTek %s ver:%u eco:%u",
+               name,
+               (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
+               (rev & CHIP_REV_ECO_MASK));
+
+       soc_info->mem_size_min = MT7621_DDR2_SIZE_MIN;
+       soc_info->mem_size_max = MT7621_DDR2_SIZE_MAX;
+       soc_info->mem_base = MT7621_DRAM_BASE;
+
+       rt2880_pinmux_data = mt7621_pinmux_data;
+
+       /* Early detection of CMP support */
+       mips_cm_probe();
+       mips_cpc_probe();
+
+       if (mips_cm_numiocu()) {
+               /*
+                * mips_cm_probe() wipes out bootloader
+                * config for CM regions and we have to configure them
+                * again. This SoC cannot talk to pamlbus devices
+                * witout proper iocu region set up.
+                *
+                * FIXME: it would be better to do this with values
+                * from DT, but we need this very early because
+                * without this we cannot talk to pretty much anything
+                * including serial.
+                */
+               write_gcr_reg0_base(MT7621_PALMBUS_BASE);
+               write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE |
+                                   CM_GCR_REGn_MASK_CMTGT_IOCU0);
+       }
+
+       if (!register_cps_smp_ops())
+               return;
+       if (!register_cmp_smp_ops())
+               return;
+       if (!register_vsmp_smp_ops())
+               return;
+}
index 844f5cd55c8f1c1e96e20db2a888c22e1fb12327..3c84166ebcb779b1f50f9711c820977295873377 100644 (file)
@@ -119,5 +119,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
        soc_info->mem_size_max = RT2880_MEM_SIZE_MAX;
 
        rt2880_pinmux_data = rt2880_pinmux_data_act;
-       ralink_soc == RT2880_SOC;
+       ralink_soc = RT2880_SOC;
 }
index 9e45725920650bd55350549097f9330403f3ef6a..d7c4ba43a4284525b6de3b3f95daa42d8a846ad2 100644 (file)
@@ -201,6 +201,7 @@ void __init ralink_clk_init(void)
        ralink_clk_add("cpu", cpu_rate);
        ralink_clk_add("sys", sys_rate);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000100.timer", wdt_rate);
        ralink_clk_add("10000120.watchdog", wdt_rate);
        ralink_clk_add("10000500.uart", uart_rate);
index 582995aaaf4e7e8a9848c512d150b515f3628344..fafec947b27df7dc436d4701f698c734904e7421 100644 (file)
@@ -109,6 +109,7 @@ void __init ralink_clk_init(void)
        ralink_clk_add("10000120.watchdog", sys_rate);
        ralink_clk_add("10000500.uart", 40000000);
        ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000b40.spi", sys_rate);
        ralink_clk_add("10000c00.uartlite", 40000000);
        ralink_clk_add("10100000.ethernet", sys_rate);
        ralink_clk_add("10180000.wmac", 40000000);
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
new file mode 100644 (file)
index 0000000..5b4f186
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+
+#include "common.h"
+
+void __init plat_time_init(void)
+{
+       ralink_of_remap();
+
+       of_clk_init(NULL);
+       clocksource_probe();
+}
index 650d5d39f34d902a60324cd9def8a48af228b994..fd1108543a71d0398097741a4afdf038c574545e 100644 (file)
@@ -89,7 +89,7 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset)
        struct rb532_gpio_chip  *gpch;
 
        gpch = container_of(chip, struct rb532_gpio_chip, chip);
-       return rb532_get_bit(offset, gpch->regbase + GPIOD);
+       return !!rb532_get_bit(offset, gpch->regbase + GPIOD);
 }
 
 /*
index 9d9962ab7d25ccfea97238f402e5dc2f65e20d98..2fd350f31f4b54c4c505700c966cb941b62e4980 100644 (file)
@@ -689,7 +689,7 @@ static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset)
 {
        struct txx9_iocled_data *data =
                container_of(chip, struct txx9_iocled_data, chip);
-       return data->cur_val & (1 << offset);
+       return !!(data->cur_val & (1 << offset));
 }
 
 static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
index 06f17e778c2750edd0b2380288da820f0c40f130..8d1c8162f0c1078d1842d754b64a078be8e1ea0e 100644 (file)
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-                        _PAGE_ACCESSED | _PAGE_THP_HUGE)
+                        _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+                        _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
index 8204b0c393aac69285a666d1b70fae0961053614..8d1c41d283184ed56b750f438a8fe201d87dbc4d 100644 (file)
@@ -223,7 +223,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_pfn(pmd)           pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)         pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)         pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)     pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)       pte_pmd(pte_mkdirty(pmd_pte(pmd)))
index 271fefbbe521badbc1a232b8f3583fb10833388b..9d08d8cbed1a1ec0e5893679c5e1fd9cb69dce09 100644 (file)
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS          NR_CPUS
 #define KVM_MAX_VCORES         NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS     512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
index 5654ece02c0db5ee41da441c30d8a2c5f1cdcbc4..3fa9df70aa20dfa01e90eb1a6af06171a1d8fbde 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
index 6a5ace5fa0c8a8bbf83a73a5fca8fb0150d5d3e8..1f2594d456054262bd2bc4a9eef265c983971afa 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            379
+#define NR_syscalls            380
 
 #define __NR__exit __NR_exit
 
index 12a05652377a28799a89ae59e3d38a7ddeaade92..940290d45b087220711cd0a508ae5c4223b21951 100644 (file)
 #define __NR_userfaultfd       364
 #define __NR_membarrier                365
 #define __NR_mlock2            378
+#define __NR_copy_file_range   379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
index 8654cb166c19b04fea827756513a1d23e4d92272..ca9e5371930ea7ca272913b931e0a5c6ffc8b656 100644 (file)
@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
        struct pci_bus *bus = eeh_pe_bus_get(pe);
-       struct device_node *dn = pci_bus_to_OF_node(bus);
+       struct device_node *dn;
        const char *loc = NULL;
 
-       if (!dn)
-               goto out;
+       while (bus) {
+               dn = pci_bus_to_OF_node(bus);
+               if (!dn) {
+                       bus = bus->parent;
+                       continue;
+               }
 
-       /* PHB PE or root PE ? */
-       if (pci_is_root_bus(bus)) {
-               loc = of_get_property(dn, "ibm,loc-code", NULL);
-               if (!loc)
+               if (pci_is_root_bus(bus))
                        loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+               else
+                       loc = of_get_property(dn, "ibm,slot-location-code",
+                                             NULL);
+
                if (loc)
-                       goto out;
+                       return loc;
 
-               /* Check the root port */
-               dn = dn->child;
-               if (!dn)
-                       goto out;
+               bus = bus->parent;
        }
 
-       loc = of_get_property(dn, "ibm,loc-code", NULL);
-       if (!loc)
-               loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-       return loc ? loc : "N/A";
+       return "N/A";
 }
 
 /**
index db475d41b57a5d4b5313d11f4494792c6070dc67..f28754c497e506cb3291462f94d422822be4e29d 100644 (file)
@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
        li      r5,0
        blr     /* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-       .llong  __crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-       .asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-       .llong 0 /* .value */
-       .llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
index 59663af9315fc16123cd4aacb090a6efd8cbc33d..ac64ffdb52c848d170fa34ef9ffe50d8db4d19c0 100644 (file)
@@ -326,7 +326,10 @@ static void dedotify_versions(struct modversion_info *vers,
                }
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
        unsigned int i;
@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
        for (i = 1; i < numsyms; i++) {
                if (syms[i].st_shndx == SHN_UNDEF) {
                        char *name = strtab + syms[i].st_name;
-                       if (name[0] == '.')
+                       if (name[0] == '.') {
+                               if (strcmp(name+1, "TOC.") == 0)
+                                       syms[i].st_shndx = SHN_ABS;
                                memmove(name, name+1, strlen(name));
+                       }
                }
        }
 }
@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
        numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
        for (i = 1; i < numsyms; i++) {
-               if (syms[i].st_shndx == SHN_UNDEF
+               if (syms[i].st_shndx == SHN_ABS
                    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
                        return &syms[i];
        }
index 774a253ca4e1eaa2ab9c8277d22cbcae0d85f676..9bf7031a67ffc0aedf742e7a9da6afebeb8fe64b 100644 (file)
@@ -377,15 +377,12 @@ no_seg_found:
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-       struct kvmppc_vcpu_book3s *vcpu_book3s;
        u64 esid, esid_1t;
        int slb_nr;
        struct kvmppc_slb *slbe;
 
        dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-       vcpu_book3s = to_book3s(vcpu);
-
        esid = GET_ESID(rb);
        esid_1t = GET_ESID_1T(rb);
        slb_nr = rb & 0xfff;
index cff207b72c46ae0b54cf48db5e4a3aa828f2fc29..baeddb06811d738a6ea8be4ce920e5ea39a9645a 100644 (file)
@@ -833,6 +833,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        vcpu->stat.sum_exits++;
 
+       /*
+        * This can happen if an interrupt occurs in the last stages
+        * of guest entry or the first stages of guest exit (i.e. after
+        * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+        * and before setting it to KVM_GUEST_MODE_HOST_HV).
+        * That can happen due to a bug, or due to a machine check
+        * occurring at just the wrong time.
+        */
+       if (vcpu->arch.shregs.msr & MSR_HV) {
+               printk(KERN_EMERG "KVM trap in HV mode!\n");
+               printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+                       vcpu->arch.trap, kvmppc_get_pc(vcpu),
+                       vcpu->arch.shregs.msr);
+               kvmppc_dump_regs(vcpu);
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               return RESUME_HOST;
+       }
        run->exit_reason = KVM_EXIT_UNKNOWN;
        run->ready_for_interrupt_injection = 1;
        switch (vcpu->arch.trap) {
index 3c6badcd53efced649af69ff9a4adf823b28d9a0..6ee26de9a1ded02e0e08f376fd612049eedee0b4 100644 (file)
@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:     rlwimi  r5, r4, 5, DAWRX_DR | DAWRX_DW
-       rlwimi  r5, r4, 1, DAWRX_WT
+       rlwimi  r5, r4, 2, DAWRX_WT
        clrrdi  r4, r4, 3
        std     r4, VCPU_DAWR(r3)
        std     r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@ machine_check_realmode:
         * guest as machine check causing guest to crash.
         */
        ld      r11, VCPU_MSR(r9)
+       rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+       bne     mc_cont                 /* if so, exit to host */
        andi.   r10, r11, MSR_RI        /* check for unrecoverable exception */
        beq     1f                      /* Deliver a machine check to guest */
        ld      r10, VCPU_PC(r9)
index 6fd2405c7f4aa2e5bdc5ae417a409b33b6c5f7bd..a3b182dcb823640540fcaeac38bc7d89550fddd6 100644 (file)
@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-                               r = -ENXIO;
-                               break;
-                       }
-                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
+                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+                               r = -ENXIO;
+                               break;
+                       }
+                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
index 22d94c3e6fc43bf7afbd63bfd483074a9e18589a..d0f0a514b04ed66da35d1ae6b27b964285ef2aec 100644 (file)
@@ -560,12 +560,12 @@ subsys_initcall(add_system_ram_resources);
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+       if (page_is_rtas_user_buf(pfn))
+               return 1;
        if (iomem_is_exclusive(PFN_PHYS(pfn)))
                return 0;
        if (!page_is_ram(pfn))
                return 1;
-       if (page_is_rtas_user_buf(pfn))
-               return 1;
        return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */
index 7d5e295255b7b25934d51792fb6876c16462d420..9958ba8bf0d27fca74a182546170a487edb88346 100644 (file)
@@ -816,7 +816,7 @@ static struct power_pmu power8_pmu = {
        .get_constraint         = power8_get_constraint,
        .get_alternatives       = power8_get_alternatives,
        .disable_pmc            = power8_disable_pmc,
-       .flags                  = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+       .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
        .n_generic              = ARRAY_SIZE(power8_generic_events),
        .generic_events         = power8_generic_events,
        .cache_events           = &power8_cache_events,
index 5038fd578e65acaeeb15127d3af1aa7a75068635..2936a0044c049681d6eeeb80eddbb3ab4ad9295a 100644 (file)
@@ -1799,9 +1799,9 @@ static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int data
        struct inode *inode = file_inode(file);
        int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (!err) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                err = spufs_mfc_flush(file, NULL);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return err;
 }
index ad4840f86be1f53a127ac829b62d2cc41c95ce49..dfa863876778144baa5cb99ac65e61c4ca60ec70 100644 (file)
@@ -163,7 +163,7 @@ static void spufs_prune_dir(struct dentry *dir)
 {
        struct dentry *dentry, *tmp;
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
                spin_lock(&dentry->d_lock);
                if (simple_positive(dentry)) {
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
                }
        }
        shrink_dcache_parent(dir);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 }
 
 /* Caller must hold parent->i_mutex */
@@ -225,9 +225,9 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
        parent = d_inode(dir->d_parent);
        ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
-       mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(parent, I_MUTEX_PARENT);
        ret = spufs_rmdir(parent, dir);
-       mutex_unlock(&parent->i_mutex);
+       inode_unlock(parent);
        WARN_ON(ret);
 
        return dcache_dir_close(inode, file);
@@ -270,7 +270,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        dget(dentry);
        inc_nlink(dir);
@@ -291,7 +291,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
        if (ret)
                spufs_rmdir(dir, dentry);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index b2e5902bd8f4d8e5f4f53cc3e6fad1cb183db7be..0f3da2cb2bd63c0d26649776fd53d844f3153d18 100644 (file)
@@ -67,7 +67,7 @@ static void hypfs_remove(struct dentry *dentry)
        struct dentry *parent;
 
        parent = dentry->d_parent;
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        if (simple_positive(dentry)) {
                if (d_is_dir(dentry))
                        simple_rmdir(d_inode(parent), dentry);
@@ -76,7 +76,7 @@ static void hypfs_remove(struct dentry *dentry)
        }
        d_delete(dentry);
        dput(dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 }
 
 static void hypfs_delete_tree(struct dentry *root)
@@ -331,7 +331,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry)) {
                dentry = ERR_PTR(-ENOMEM);
@@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        d_instantiate(dentry, inode);
        dget(dentry);
 fail:
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return dentry;
 }
 
index 16aa0c779e0762e210fe6652a0a5efda24771381..595a275c36f87d5ea4fa58d0a580eb36c601ef34 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED       (3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)                                   \
 ({                                                                     \
@@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void)
        __arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-       __arch_local_irq_ssm(flags);
+       /* only disabled->disabled and disabled->enabled is valid */
+       if (flags & ARCH_IRQ_ENABLED)
+               arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-       return !(flags & (3UL << (BITS_PER_LONG - 8)));
+       return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
index 6742414dbd6f30b66451f04602cb55de0d20f41c..8959ebb6d2c9eb35538b835a39522585321f9af4 100644 (file)
@@ -546,7 +546,6 @@ struct kvm_vcpu_arch {
        struct kvm_s390_sie_block *sie_block;
        unsigned int      host_acrs[NUM_ACRS];
        struct fpu        host_fpregs;
-       struct fpu        guest_fpregs;
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct kvm_s390_pgm_info pgm;
index 1a9a98de5bdebc346c469f55818f08ed33b576e7..69aa18be61afcfc1e764c5c51ec5cbe5d0fe2043 100644 (file)
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES         0x7fff
-#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK       0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK       0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT               48
+#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK       ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES                                                 \
+       ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK                                               \
+       (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
        u32 fh;
@@ -21,8 +24,9 @@ struct zpci_iomap_entry {
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)                                                         \
-       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)                                                      \
        ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
index f16debf6a612932a7312428465d9ceb1bee41915..1c4fe129486d2e0a9282dc72a3735c4727277b52 100644 (file)
@@ -166,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
  */
 #define start_thread(regs, new_psw, new_stackp) do {                   \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;    \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        execve_tail();                                                  \
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {                 \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_BA;                  \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        crst_table_downgrade(current->mm, 1UL << 31);                   \
        execve_tail();                                                  \
index f00cd35c8ac4634e9270ff81e73e603ccc5573f0..99bc456cc26a3087cbec7f35ca82959a94cf2ab3 100644 (file)
@@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 #define arch_has_block_step()  (1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
                                           unsigned long val)
 {
-       regs->psw.addr = val | PSW_ADDR_AMODE;
+       regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-       return regs->gprs[15] & PSW_ADDR_INSN;
+       return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
index 34ec202472c6cadb5b973b54c6c995bb6d2e897c..ab3aa6875a59ca4dc3760663d79189090286d7ab 100644 (file)
 #define __NR_recvmsg           372
 #define __NR_shutdown          373
 #define __NR_mlock2            374
-#define NR_syscalls 375
+#define __NR_copy_file_range   375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
index fac4eeddef91fe3acb21e0108069f3f3a70ef72a..ae2cda5eee5a99b35b73e5b7868edd44cba1c6d2 100644 (file)
@@ -177,3 +177,4 @@ COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
index a92b39fd0e63d953db2d94bbfb090d9f257e867f..3986c9f6219174bf7462b88579b5b030d830185e 100644 (file)
@@ -59,8 +59,6 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
        struct save_area *sa;
 
        sa = (void *) memblock_alloc(sizeof(*sa), 8);
-       if (!sa)
-               return NULL;
        if (is_boot_cpu)
                list_add(&sa->list, &dump_save_areas);
        else
index 6fca0e46464e01842f613584e4c82e5ca1fe4270..c890a5589e59583bb06b1d7dffe7226069e522a0 100644 (file)
@@ -1470,7 +1470,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
                except_str = "*";
        else
                except_str = "-";
-       caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+       caller = (unsigned long) entry->caller;
        rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
                      area, (long long)time_spec.tv_sec,
                      time_spec.tv_nsec / 1000, level, except_str,
index dc8e20473484ce32f037f1e9115779a8a9d7d0cd..02bd02ff648b5ab570c5ffe920b27bbfbc3d5fd3 100644 (file)
@@ -34,22 +34,21 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
        unsigned long addr;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               addr = sf->gprs[8] & PSW_ADDR_INSN;
+               addr = sf->gprs[8];
                printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                }
                /* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                low = sp;
                sp = regs->gprs[15];
index 20a5caf6d981d9f81852b015765662c745ed6b3b..c55576bbaa1f7555479994d6848c9aa5b60f189b 100644 (file)
@@ -252,14 +252,14 @@ static void early_pgm_check_handler(void)
        unsigned long addr;
 
        addr = S390_lowcore.program_old_psw.addr;
-       fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(addr);
        if (!fixup)
                disabled_wait(0);
        /* Disable low address protection before storing into lowcore. */
        __ctl_store(cr0, 0, 0);
        cr0_new = cr0 & ~(1UL << 28);
        __ctl_load(cr0_new, 0, 0);
-       S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+       S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
        __ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void)
        psw_t psw;
 
        psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+       psw.addr = (unsigned long) s390_base_ext_handler;
        S390_lowcore.external_new_psw = psw;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+       psw.addr = (unsigned long) s390_base_pgm_handler;
        S390_lowcore.program_new_psw = psw;
        s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
index e0eaf11134b44706e1223daca7bd66e938bf37f0..0f7bfeba6da6a3632f2705ebbaccc065ffd356a2 100644 (file)
@@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
                goto out;
        if (unlikely(atomic_read(&current->tracing_graph_pause)))
                goto out;
-       ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+       ip -= MCOUNT_INSN_SIZE;
        trace.func = ip;
        trace.depth = current->curr_ret_stack + 1;
        /* Only trace if the calling function expects to. */
index 0a5a6b661b938743684a6aec8d1d5ebcca537be1..f20abdb5630adceaa75690907ddf7cdedc098146 100644 (file)
@@ -2057,12 +2057,12 @@ void s390_reset_system(void)
        /* Set new machine check handler */
        S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+               (unsigned long) s390_base_mcck_handler;
 
        /* Set new program check handler */
        S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+               (unsigned long) s390_base_pgm_handler;
 
        /*
         * Clear subchannel ID and number to signal new kernel that no CCW or
index 389db56a2208bfa118d6f31aee9b52d090031960..250f5972536a8948a916e56a4514e942e03bde4a 100644 (file)
@@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(per_kprobe, 9, 11);
        regs->psw.mask |= PSW_MASK_PER;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
        regs->psw.mask &= ~PSW_MASK_PER;
        regs->psw.mask |= kcb->kprobe_saved_imask;
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs)
         */
        preempt_disable();
        kcb = get_kprobe_ctlblk();
-       p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+       p = get_kprobe((void *)(regs->psw.addr - 2));
 
        if (p) {
                if (kprobe_running()) {
@@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                        break;
        }
 
-       regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+       regs->psw.addr = orig_ret_address;
 
        pop_kprobe(get_kprobe_ctlblk());
        kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+       unsigned long ip = regs->psw.addr;
        int fixup = probe_get_fixup_type(p->ainsn.insn);
 
        /* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
                 * In case the user-specified fault handler returned
                 * zero, try to fix up.
                 */
-               entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               entry = search_exception_tables(regs->psw.addr);
                if (entry) {
-                       regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(entry);
                        return 1;
                }
 
@@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
        memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
        /* setup return addr to the jprobe handler routine */
-       regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) jp->entry;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
        /* r15 is the stack pointer */
index 61595c1f0a0fe7d502bb6c213519929effe48b02..cfcba2dd9bb51d30b105990e1257543e8b9c39fd 100644 (file)
@@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-       return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+       return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry,
        struct pt_regs *regs;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       perf_callchain_store(entry,
-                                            sf->gprs[8] & PSW_ADDR_INSN);
+                       perf_callchain_store(entry, sf->gprs[8]);
                }
                /* Zero backchain detected, check for interrupt frame. */
                sp = (unsigned long) (sf + 1);
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
index 114ee8b96f17d31d5dba657c704bc34a2e1b9a9e..2bba7df4ac519fdc773551600b8bab2b9fef4789 100644 (file)
@@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
                return 0;
        low = task_stack_page(tsk);
        high = (struct stack_frame *) task_pt_regs(tsk);
-       sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) tsk->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
-       sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) sf->back_chain;
        if (sf <= low || sf > high)
                return 0;
        return sf->gprs[8];
@@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
                memset(&frame->childregs, 0, sizeof(struct pt_regs));
                frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
                                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-               frame->childregs.psw.addr = PSW_ADDR_AMODE |
+               frame->childregs.psw.addr =
                                (unsigned long) kernel_thread_starter;
                frame->childregs.gprs[9] = new_stackp; /* function */
                frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p)
                return 0;
        low = task_stack_page(p);
        high = (struct stack_frame *) task_pt_regs(p);
-       sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) p->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
        for (count = 0; count < 16; count++) {
-               sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+               sf = (struct stack_frame *) sf->back_chain;
                if (sf <= low || sf > high)
                        return 0;
-               return_address = sf->gprs[8] & PSW_ADDR_INSN;
+               return_address = sf->gprs[8];
                if (!in_sched_functions(return_address))
                        return return_address;
        }
index 01c37b36caf964ec4616048c00d6602607a9abca..49b1c13bf6c96372d60815e8549c10b15b40914f 100644 (file)
@@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task)
                if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
                        new.control |= PER_EVENT_IFETCH;
                new.start = 0;
-               new.end = PSW_ADDR_INSN;
+               new.end = -1UL;
        }
 
        /* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
        else if (addr == (addr_t) &dummy->cr11)
                /* End address of the active per set. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
-                       PSW_ADDR_INSN : child->thread.per_user.end;
+                       -1UL : child->thread.per_user.end;
        else if (addr == (addr_t) &dummy->bits)
                /* Single-step bit. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request,
                }
                return 0;
        default:
-               /* Removing high order bit from addr (only for 31 bit). */
-               addr &= PSW_ADDR_INSN;
                return ptrace_request(child, request, addr, data);
        }
 }
index c6878fbbcf1324661ac52f4f55ba1e40840fbd89..9220db5c996aa5250ca4d904361c51cec33d7827 100644 (file)
@@ -301,25 +301,21 @@ static void __init setup_lowcore(void)
        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
        lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
        lc->restart_psw.mask = PSW_KERNEL_BITS;
-       lc->restart_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+       lc->restart_psw.addr = (unsigned long) restart_int_handler;
        lc->external_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->external_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+       lc->external_new_psw.addr = (unsigned long) ext_int_handler;
        lc->svc_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-       lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+       lc->svc_new_psw.addr = (unsigned long) system_call;
        lc->program_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+       lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-       lc->mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+       lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
        lc->io_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+       lc->io_new_psw.addr = (unsigned long) io_int_handler;
        lc->clock_comparator = -1ULL;
        lc->kernel_stack = ((unsigned long) &init_thread_union)
                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
index 028cc46cb82ad77ac21a7c9b9ea897e2312480d4..d82562cf0a0e1f5057987f59f6d541ea374ea683 100644 (file)
@@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ka->sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ka->sa.sa_restorer;
        } else {
                /* Signal frame without vector registers are short ! */
                __u16 __user *svc = (void __user *) frame + frame_size - 2;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Set up registers for signal handler */
@@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
        regs->gprs[2] = sig;
        regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long)
-                       ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ksig->ka.sa.sa_restorer;
        } else {
                __u16 __user *svc = &frame->svc_insn;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
        regs->gprs[2] = ksig->sig;
        regs->gprs[3] = (unsigned long) &frame->info;
index a13468b9a9133b9f46d8c90575ea0b5ffcd41e04..3c65a8eae34d35b6a670066db2bc80c8fa3ccf67 100644 (file)
@@ -623,8 +623,6 @@ void __init smp_save_dump_cpus(void)
                return;
        /* Allocate a page as dumping area for the store status sigps */
        page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-       if (!page)
-               panic("could not allocate memory for save area\n");
        /* Set multi-threading state to the previous system. */
        pcpu_set_smt(sclp.mtid_prev);
        boot_cpu_addr = stap();
index 1785cd82253caec869abbb7f5d7804b42effad04..5acba3cb7220ea8c779d9606268a17f9b0a9a928 100644 (file)
@@ -21,12 +21,11 @@ static unsigned long save_context_stack(struct stack_trace *trace,
        unsigned long addr;
 
        while(1) {
-               sp &= PSW_ADDR_INSN;
                if (sp < low || sp > high)
                        return sp;
                sf = (struct stack_frame *)sp;
                while(1) {
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
                        else
@@ -34,7 +33,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                        if (trace->nr_entries >= trace->max_entries)
                                return sp;
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *)sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                if (savesched || !in_sched_functions(addr)) {
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
@@ -65,7 +64,7 @@ void save_stack_trace(struct stack_trace *trace)
        register unsigned long sp asm ("15");
        unsigned long orig_sp, new_sp;
 
-       orig_sp = sp & PSW_ADDR_INSN;
+       orig_sp = sp;
        new_sp = save_context_stack(trace, orig_sp,
                                    S390_lowcore.panic_stack - PAGE_SIZE,
                                    S390_lowcore.panic_stack, 1);
@@ -86,7 +85,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
        unsigned long sp, low, high;
 
-       sp = tsk->thread.ksp & PSW_ADDR_INSN;
+       sp = tsk->thread.ksp;
        low = (unsigned long) task_stack_page(tsk);
        high = (unsigned long) task_pt_regs(tsk);
        save_context_stack(trace, sp, low, high, 0);
index 5378c3ea1b984918ed3719c8a9cf8d1c8c86574a..293d8b98fd525c992146fa4f5c85f1287d90bc62 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(sys_recvfrom,compat_sys_recvfrom)
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
index d69d648759c9f926ae91771cef6a8cc291ee403e..017eb03daee2e724a603420f65b6e93dde98ac6a 100644 (file)
@@ -32,8 +32,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
                address = *(unsigned long *)(current->thread.trap_tdb + 24);
        else
                address = regs->psw.addr;
-       return (void __user *)
-               ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+       return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
                return;
        printk("User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr("in ", regs->psw.addr);
        printk("\n");
        show_regs(regs);
 }
@@ -69,13 +68,13 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
                report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
-                       regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(fixup);
                else {
                        enum bug_trap_type btt;
 
-                       btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+                       btt = report_bug(regs->psw.addr, regs);
                        if (btt == BUG_TRAP_TYPE_WARN)
                                return;
                        die(regs, str);
index 5fce52cf0e57dc4831d0737ddd3c3a2cfbc386d7..5ea5af3c7db72479a69710a6606d8dddd6143b7a 100644 (file)
@@ -29,6 +29,7 @@ config KVM
        select HAVE_KVM_IRQFD
        select HAVE_KVM_IRQ_ROUTING
        select SRCU
+       select KVM_VFIO
        ---help---
          Support hosting paravirtualized guest machines using the SIE
          virtualization capability on the mainframe. This should work
index b3b553469650888fd31df5d975fe4b93b2399f70..d42fa38c24292157c0da0f015d2063bc1c5e5723 100644 (file)
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
index 47518a324d752286d4b0cb80784c43c316f31c45..d697312ce9ee325571f25c74fb0d963f50943537 100644 (file)
@@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
        if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr10 = 0;
-               *cr11 = PSW_ADDR_INSN;
+               *cr11 = -1UL;
        } else {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
                vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
                vcpu->arch.sie_block->gcr[10] = 0;
-               vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+               vcpu->arch.sie_block->gcr[11] = -1UL;
        }
 
        if (guestdbg_hw_bp_enabled(vcpu)) {
index 835d60bedb545fa1873b6b37e57dffbec3bb9600..4af21c771f9b3925bf860d0dc86cebcc803cf965 100644 (file)
@@ -1423,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-       dst->fpc = current->thread.fpu.fpc;
-       dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-       current->thread.fpu.fpc = from->fpc;
-       current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        /* Save host register state */
        save_fpu_regs();
-       save_fpu_to(&vcpu->arch.host_fpregs);
-
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-               /*
-                * Use the register save area in the SIE-control block
-                * for register restore and save in kvm_arch_vcpu_put()
-                */
-               current->thread.fpu.vxrs =
-                       (__vector128 *)&vcpu->run->s.regs.vrs;
-       } else
-               load_fpu_from(&vcpu->arch.guest_fpregs);
+       vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+       vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
+       /* Depending on MACHINE_HAS_VX, data stored to vrs either
+        * has vector register or floating point register format.
+        */
+       current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+       current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
        if (test_fp_ctl(current->thread.fpu.fpc))
                /* User space provided an invalid FPC, let's clear it */
                current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
 
+       /* Save guest register state */
        save_fpu_regs();
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-       if (test_kvm_facility(vcpu->kvm, 129))
-               /*
-                * kvm_arch_vcpu_load() set up the register save area to
-                * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-                * are already saved.  Only the floating-point control must be
-                * copied.
-                */
-               vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-       else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
-       load_fpu_from(&vcpu->arch.host_fpregs);
+       /* Restore host register state */
+       current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+       current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-       vcpu->arch.guest_fpregs.fpc = 0;
-       asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+       /* make sure the new fpc will be lazily loaded */
+       save_fpu_regs();
+       current->thread.fpu.fpc = 0;
        vcpu->arch.sie_block->gbea = 1;
        vcpu->arch.sie_block->pp = 0;
        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        vcpu->arch.local_int.wq = &vcpu->wq;
        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-       /*
-        * Allocate a save area for floating-point registers.  If the vector
-        * extension is available, register contents are saved in the SIE
-        * control block.  The allocated save area is still required in
-        * particular places, for example, in kvm_s390_vcpu_store_status().
-        */
-       vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-                                              GFP_KERNEL);
-       if (!vcpu->arch.guest_fpregs.fprs)
-               goto out_free_sie_block;
-
        rc = kvm_vcpu_init(vcpu, kvm, id);
        if (rc)
                goto out_free_sie_block;
@@ -1879,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+       /* make sure the new values will be lazily loaded */
+       save_fpu_regs();
        if (test_fp_ctl(fpu->fpc))
                return -EINVAL;
-       memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-       vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-       save_fpu_regs();
-       load_fpu_from(&vcpu->arch.guest_fpregs);
+       current->thread.fpu.fpc = fpu->fpc;
+       if (MACHINE_HAS_VX)
+               convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+       else
+               memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-       fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+       /* make sure we have the latest values */
+       save_fpu_regs();
+       if (MACHINE_HAS_VX)
+               convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+       else
+               memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+       fpu->fpc = current->thread.fpu.fpc;
        return 0;
 }
 
@@ -2396,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
        unsigned char archmode = 1;
+       freg_t fprs[NUM_FPRS];
        unsigned int px;
        u64 clkcomp;
        int rc;
@@ -2411,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
                gpa = px;
        } else
                gpa -= __LC_FPREGS_SAVE_AREA;
-       rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-                            vcpu->arch.guest_fpregs.fprs, 128);
+
+       /* manually convert vector registers if necessary */
+       if (MACHINE_HAS_VX) {
+               convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    fprs, 128);
+       } else {
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    vcpu->run->s.regs.vrs, 128);
+       }
        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
                              vcpu->run->s.regs.gprs, 128);
        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
                              &px, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-                             &vcpu->arch.guest_fpregs.fpc, 4);
+                             &vcpu->run->s.regs.fpc, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
                              &vcpu->arch.sie_block->todpr, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
         * it into the save area
         */
        save_fpu_regs();
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               /*
-                * If the vector extension is available, the vector registers
-                * which overlaps with floating-point registers are saved in
-                * the SIE-control block.  Hence, extract the floating-point
-                * registers and the FPC value and store them in the
-                * guest_fpregs structure.
-                */
-               vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-               convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-                                current->thread.fpu.vxrs);
-       } else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
        save_access_regs(vcpu->run->s.regs.acrs);
 
        return kvm_s390_store_status_unloaded(vcpu, addr);
index 1b903f6ad54a327ca73ee88bdecee0a73e1eaf42..791a4146052c7a48fdb72a6ef3b9e0d1abc06eeb 100644 (file)
@@ -228,7 +228,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
                return;
        printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr(KERN_CONT "in ", regs->psw.addr);
        printk(KERN_CONT "\n");
        printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
               regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,9 +256,9 @@ static noinline void do_no_context(struct pt_regs *regs)
        const struct exception_table_entry *fixup;
 
        /* Are we prepared to handle this kernel fault?  */
-       fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(regs->psw.addr);
        if (fixup) {
-               regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+               regs->psw.addr = extable_fixup(fixup);
                return;
        }
 
index c722400c769784d8df6f13507693e117ad942497..73e29033709285b44ceb5609919c07aff86e9de9 100644 (file)
@@ -98,7 +98,7 @@ void __init paging_init(void)
        __ctl_load(S390_lowcore.kernel_asce, 1, 1);
        __ctl_load(S390_lowcore.kernel_asce, 7, 7);
        __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-       arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+       __arch_local_irq_stosm(0x04);
 
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
index ea01477b4aa67155f87b88e8c6a87f5374eda247..45c4daa49930f9a2bdfaf1d07a6ab5e002aa99c2 100644 (file)
@@ -169,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-       if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+       if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
                return 0;
        if (!(flags & MAP_FIXED))
                addr = 0;
        if ((addr + len) >= TASK_SIZE)
-               return crst_table_upgrade(current->mm, 1UL << 53);
+               return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
        return 0;
 }
 
@@ -189,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
        area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
        area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area_topdown(filp, addr, len,
index a809fa8e6f8bd01d2c0ad90891060d6aabd36a80..5109827883acaefc6afc0ac913956fcb79f8f8a0 100644 (file)
@@ -55,7 +55,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
        unsigned long entry;
        int flush;
 
-       BUG_ON(limit > (1UL << 53));
+       BUG_ON(limit > TASK_MAX_SIZE);
        flush = 0;
 repeat:
        table = crst_table_alloc(mm);
index 43f32ce60aa3d98af0b7665090fa3eb080d12fa7..2794845061c66fe3605c0b2e97b591b6cf176d32 100644 (file)
@@ -57,9 +57,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
        pg_data_t *res;
 
-       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
-       if (!res)
-               panic("Could not allocate memory for node data!\n");
+       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
        memset(res, 0, sizeof(pg_data_t));
        return res;
 }
@@ -162,7 +160,7 @@ static int __init numa_init_late(void)
                register_one_node(nid);
        return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {
index 8a6811b2cdb9523a24cd32341dee0fcef317f661..fe0bfe370c4534a1ab5ceac19d54372390e19289 100644 (file)
@@ -16,24 +16,23 @@ __show_trace(unsigned int *depth, unsigned long sp,
        struct pt_regs *regs;
 
        while (*depth) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
 
                /* Follow the backchain.  */
                while (*depth) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
                        (*depth)--;
-                       oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+                       oprofile_add_trace(sf->gprs[8]);
 
                }
 
@@ -46,7 +45,7 @@ __show_trace(unsigned int *depth, unsigned long sp,
                        return sp;
                regs = (struct pt_regs *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
index 11d4f277e9f69ac3d266e14ca9804e4b6c844fac..8f19c8f9d660570839a69f60477f855181ba4e7c 100644 (file)
@@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = {
        .isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES                                             \
+       min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),  \
+           ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -265,27 +268,20 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
                              unsigned long max)
 {
        struct zpci_dev *zdev = to_zpci(pdev);
-       u64 addr;
        int idx;
 
-       if ((bar & 7) != bar)
+       if (!pci_resource_len(pdev, bar))
                return NULL;
 
        idx = zdev->bars[bar].map_idx;
        spin_lock(&zpci_iomap_lock);
-       if (zpci_iomap_start[idx].count++) {
-               BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-                      zpci_iomap_start[idx].bar != bar);
-       } else {
-               zpci_iomap_start[idx].fh = zdev->fh;
-               zpci_iomap_start[idx].bar = bar;
-       }
        /* Detect overrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!++zpci_iomap_start[idx].count);
+       zpci_iomap_start[idx].fh = zdev->fh;
+       zpci_iomap_start[idx].bar = bar;
        spin_unlock(&zpci_iomap_lock);
 
-       addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-       return (void __iomem *) addr + offset;
+       return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,12 +293,11 @@ EXPORT_SYMBOL(pci_iomap);
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-       unsigned int idx;
+       unsigned int idx = ZPCI_IDX(addr);
 
-       idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
        spin_lock(&zpci_iomap_lock);
        /* Detect underrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!zpci_iomap_start[idx].count);
        if (!--zpci_iomap_start[idx].count) {
                zpci_iomap_start[idx].fh = 0;
                zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@ static void zpci_irq_exit(void)
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-       int entry;
+       unsigned long entry;
 
        spin_lock(&zpci_iomap_lock);
-       entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-       if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+       entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+       if (entry == ZPCI_IOMAP_ENTRIES) {
                spin_unlock(&zpci_iomap_lock);
                return -ENOSPC;
        }
-       set_bit(entry, zpci_iomap);
+       set_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
        return entry;
 }
@@ -561,7 +556,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 {
        spin_lock(&zpci_iomap_lock);
        memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-       clear_bit(entry, zpci_iomap);
+       clear_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
 }
 
@@ -611,8 +606,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
                if (zdev->bars[i].val & 4)
                        flags |= IORESOURCE_MEM_64;
 
-               addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+               addr = ZPCI_ADDR(entry);
                size = 1UL << zdev->bars[i].size;
 
                res = __alloc_res(zdev, addr, size, flags);
@@ -873,23 +867,30 @@ static int zpci_mem_init(void)
        zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
                                16, 0, NULL);
        if (!zdev_fmb_cache)
-               goto error_zdev;
+               goto error_fmb;
 
-       /* TODO: use realloc */
-       zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-                                  GFP_KERNEL);
+       zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+                                  sizeof(*zpci_iomap_start), GFP_KERNEL);
        if (!zpci_iomap_start)
                goto error_iomap;
-       return 0;
 
+       zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+                                   sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+       if (!zpci_iomap_bitmap)
+               goto error_iomap_bitmap;
+
+       return 0;
+error_iomap_bitmap:
+       kfree(zpci_iomap_start);
 error_iomap:
        kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
        return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+       kfree(zpci_iomap_bitmap);
        kfree(zpci_iomap_start);
        kmem_cache_destroy(zdev_fmb_cache);
 }
index 369a3e05d468dbf1973df24c77e4ae07e580c3b6..b0e04751c5d599fbdbb4bdb68fdbed5dcb4039d5 100644 (file)
@@ -53,6 +53,11 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 
        pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
               pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+       if (!pdev)
+               return;
+
+       pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)
index f887c6465a821b7b96a2a300268f077afd70ae5a..8a84e05adb2e122fa0d39a1c2acdfadf63202266 100644 (file)
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 
index e13d41c392ae4f4940a41e6c8049099c71d28a04..f878bec23576c54c619b633ce0c81508519ddb56 100644 (file)
@@ -34,21 +34,18 @@ struct page;
 
 #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
 
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
-#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32))
-
-#define pte_get_bits(pte, bits) ((pte).pte_low & (bits))
-#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits))
-#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits))
-#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \
-                             smp_wmb(); \
-                             (to).pte_low = (from).pte_low; })
-#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high)
-#define pte_set_val(pte, phys, prot) \
-       ({ (pte).pte_high = (phys) >> 32; \
-          (pte).pte_low = (phys) | pgprot_val(prot); })
+#define pte_val(p) ((p).pte)
+
+#define pte_get_bits(p, bits) ((p).pte & (bits))
+#define pte_set_bits(p, bits) ((p).pte |= (bits))
+#define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
+#define pte_copy(to, from) ({ (to).pte = (from).pte; })
+#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
+#define pte_set_val(p, phys, prot) \
+       ({ (p).pte = (phys) | pgprot_val(prot); })
 
 #define pmd_val(x)     ((x).pmd)
 #define __pmd(x) ((pmd_t) { (x) } )
index 330e738ccfc13633954a02677e46bb1b12605fa9..9af2e63384005002bc99c762f8a61fdd1bbbb3ca 100644 (file)
@@ -509,11 +509,10 @@ config X86_INTEL_CE
 
 config X86_INTEL_MID
        bool "Intel MID platform support"
-       depends on X86_32
        depends on X86_EXTENDED_PLATFORM
        depends on X86_PLATFORM_DEVICES
        depends on PCI
-       depends on PCI_GOANY
+       depends on X86_64 || (PCI_GOANY && X86_32)
        depends on X86_IO_APIC
        select SFI
        select I2C
index 712b13047b41e9cdf7116048d0ffce510d3aa283..3a33124e91129e85c3f34fc6dc85ca43c8e35cff 100644 (file)
@@ -157,7 +157,9 @@ ENTRY(chacha20_4block_xor_ssse3)
        # done with the slightly better performing SSSE3 byte shuffling,
        # 7/12-bit word rotation uses traditional shift+OR.
 
-       sub             $0x40,%rsp
+       mov             %rsp,%r11
+       sub             $0x80,%rsp
+       and             $~63,%rsp
 
        # x0..15[0-3] = s0..3[0..3]
        movq            0x00(%rdi),%xmm1
@@ -620,6 +622,6 @@ ENTRY(chacha20_4block_xor_ssse3)
        pxor            %xmm1,%xmm15
        movdqu          %xmm15,0xf0(%rsi)
 
-       add             $0x40,%rsp
+       mov             %r11,%rsp
        ret
 ENDPROC(chacha20_4block_xor_ssse3)
index 881b4768644aad3537364286e468146aba73ad56..e7de5c9a4fbdd2c5d99b78d82d2c58019d20dc04 100644 (file)
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
index 04c27a0131656db29e97805432e56d3a1aae5992..4432ab7f407ccc5f47ea9c19a850c29af821d8c9 100644 (file)
@@ -366,20 +366,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
        return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                          ((val & _PAGE_PAT_LARGE) >>
                           (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
index 1544fabcd7f9b7428a5d3ec7429f00f03b545945..c57fd1ea968957cd600ea1032caf1a9f379b38e5 100644 (file)
@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
 }
 
 /**
- * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * arch_wb_cache_pmem - write back a cache range with CLWB
  * @vaddr:     virtual start address
  * @size:      number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
  * instruction.  This function requires explicit ordering with an
- * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ * arch_wmb_pmem() call.
  */
-static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
 {
        u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
        unsigned long clflush_mask = x86_clflush_size - 1;
+       void *vaddr = (void __force *)addr;
        void *vend = vaddr + size;
        void *p;
 
@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
        len = copy_from_iter_nocache(vaddr, bytes, i);
 
        if (__iter_needs_pmem_wb(i))
-               __arch_wb_cache_pmem(vaddr, bytes);
+               arch_wb_cache_pmem(addr, bytes);
 
        return len;
 }
@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
        void *vaddr = (void __force *)addr;
 
        memset(vaddr, 0, size);
-       __arch_wb_cache_pmem(vaddr, size);
+       arch_wb_cache_pmem(addr, size);
 }
 
 static inline bool __arch_has_wmb_pmem(void)
index f25321894ad294278c20d5ba7b4076ce2353a8fb..fdb0fbfb1197a4cf4e485c4f330b3a399b3a6d5f 100644 (file)
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
        const struct cpumask *mask;
+       struct irq_desc *desc;
        struct irq_data *idata;
        struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
                if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
                        continue;
 
-               idata = irq_get_irq_data(irq);
+               desc = irq_to_desc(irq);
+               raw_spin_lock_irq(&desc->lock);
+               idata = irq_desc_get_irq_data(desc);
 
                /*
                 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
                /* Might be lapic_chip for irq 0 */
                if (chip->irq_set_affinity)
                        chip->irq_set_affinity(idata, mask, false);
+               raw_spin_unlock_irq(&desc->lock);
        }
 }
 #endif
index 908cb37da171ebed1a361203dc520c15e4e42736..3b670df4ba7b429f48102b1401c9133957ecb6a9 100644 (file)
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
         */
        static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
        static int current_offset = VECTOR_OFFSET_START % 16;
-       int cpu, err;
+       int cpu, vector;
 
-       if (d->move_in_progress)
+       /*
+        * If there is still a move in progress or the previous move has not
+        * been cleaned up completely, tell the caller to come back later.
+        */
+       if (d->move_in_progress ||
+           cpumask_intersects(d->old_domain, cpu_online_mask))
                return -EBUSY;
 
        /* Only try and allocate irqs on cpus that are present */
-       err = -ENOSPC;
        cpumask_clear(d->old_domain);
+       cpumask_clear(searched_cpumask);
        cpu = cpumask_first_and(mask, cpu_online_mask);
        while (cpu < nr_cpu_ids) {
-               int new_cpu, vector, offset;
+               int new_cpu, offset;
 
+               /* Get the possible target cpus for @mask/@cpu from the apic */
                apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+               /*
+                * Clear the offline cpus from @vector_cpumask for searching
+                * and verify whether the result overlaps with @mask. If true,
+                * then the call to apic->cpu_mask_to_apicid_and() will
+                * succeed as well. If not, no point in trying to find a
+                * vector in this mask.
+                */
+               cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+               if (!cpumask_intersects(vector_searchmask, mask))
+                       goto next_cpu;
+
                if (cpumask_subset(vector_cpumask, d->domain)) {
-                       err = 0;
                        if (cpumask_equal(vector_cpumask, d->domain))
-                               break;
+                               goto success;
                        /*
-                        * New cpumask using the vector is a proper subset of
-                        * the current in use mask. So cleanup the vector
-                        * allocation for the members that are not used anymore.
+                        * Mark the cpus which are not longer in the mask for
+                        * cleanup.
                         */
-                       cpumask_andnot(d->old_domain, d->domain,
-                                      vector_cpumask);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-                       cpumask_and(d->domain, d->domain, vector_cpumask);
-                       break;
+                       cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+                       vector = d->cfg.vector;
+                       goto update;
                }
 
                vector = current_vector;
@@ -158,45 +170,60 @@ next:
                        vector = FIRST_EXTERNAL_VECTOR + offset;
                }
 
-               if (unlikely(current_vector == vector)) {
-                       cpumask_or(d->old_domain, d->old_domain,
-                                  vector_cpumask);
-                       cpumask_andnot(vector_cpumask, mask, d->old_domain);
-                       cpu = cpumask_first_and(vector_cpumask,
-                                               cpu_online_mask);
-                       continue;
-               }
+               /* If the search wrapped around, try the next cpu */
+               if (unlikely(current_vector == vector))
+                       goto next_cpu;
 
                if (test_bit(vector, used_vectors))
                        goto next;
 
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+               for_each_cpu(new_cpu, vector_searchmask) {
                        if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
                                goto next;
                }
                /* Found one! */
                current_vector = vector;
                current_offset = offset;
-               if (d->cfg.vector) {
+               /* Schedule the old vector for cleanup on all cpus */
+               if (d->cfg.vector)
                        cpumask_copy(d->old_domain, d->domain);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-               }
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+               for_each_cpu(new_cpu, vector_searchmask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-               d->cfg.vector = vector;
-               cpumask_copy(d->domain, vector_cpumask);
-               err = 0;
-               break;
-       }
+               goto update;
 
-       if (!err) {
-               /* cache destination APIC IDs into cfg->dest_apicid */
-               err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-                                                  &d->cfg.dest_apicid);
+next_cpu:
+               /*
+                * We exclude the current @vector_cpumask from the requested
+                * @mask and try again with the next online cpu in the
+                * result. We cannot modify @mask, so we use @vector_cpumask
+                * as a temporary buffer here as it will be reassigned when
+                * calling apic->vector_allocation_domain() above.
+                */
+               cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+               cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+               cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+               continue;
        }
+       return -ENOSPC;
 
-       return err;
+update:
+       /*
+        * Exclude offline cpus from the cleanup mask and set the
+        * move_in_progress flag when the result is not empty.
+        */
+       cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+       d->move_in_progress = !cpumask_empty(d->old_domain);
+       d->cfg.vector = vector;
+       cpumask_copy(d->domain, vector_cpumask);
+success:
+       /*
+        * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+        * as we already established, that mask & d->domain & cpu_online_mask
+        * is not empty.
+        */
+       BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+                                           &d->cfg.dest_apicid));
+       return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
        struct irq_desc *desc;
-       unsigned long flags;
        int cpu, vector;
 
-       raw_spin_lock_irqsave(&vector_lock, flags);
        BUG_ON(!data->cfg.vector);
 
        vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
        data->cfg.vector = 0;
        cpumask_clear(data->domain);
 
-       if (likely(!data->move_in_progress)) {
-               raw_spin_unlock_irqrestore(&vector_lock, flags);
+       /*
+        * If move is in progress or the old_domain mask is not empty,
+        * i.e. the cleanup IPI has not been processed yet, we need to remove
+        * the old references to desc from all cpus vector tables.
+        */
+       if (!data->move_in_progress && cpumask_empty(data->old_domain))
                return;
-       }
 
        desc = irq_to_desc(irq);
        for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
                }
        }
        data->move_in_progress = 0;
-       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
                                 unsigned int virq, unsigned int nr_irqs)
 {
+       struct apic_chip_data *apic_data;
        struct irq_data *irq_data;
+       unsigned long flags;
        int i;
 
        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
                if (irq_data && irq_data->chip_data) {
+                       raw_spin_lock_irqsave(&vector_lock, flags);
                        clear_irq_vector(virq + i, irq_data->chip_data);
-                       free_apic_chip_data(irq_data->chip_data);
+                       apic_data = irq_data->chip_data;
+                       irq_domain_reset_irq_data(irq_data);
+                       raw_spin_unlock_irqrestore(&vector_lock, flags);
+                       free_apic_chip_data(apic_data);
 #ifdef CONFIG_X86_IO_APIC
                        if (virq + i < nr_legacy_irqs())
                                legacy_irq_data[virq + i] = NULL;
 #endif
-                       irq_domain_reset_irq_data(irq_data);
                }
        }
 }
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
        arch_init_htirq_domain(x86_vector_domain);
 
        BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
        return arch_early_ioapic_init();
 }
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
                return -EINVAL;
 
        err = assign_irq_vector(irq, data, dest);
-       if (err) {
-               if (assign_irq_vector(irq, data,
-                                     irq_data_get_affinity_mask(irq_data)))
-                       pr_err("Failed to recover vector for irq %d\n", irq);
-               return err;
-       }
-
-       return IRQ_SET_MASK_OK;
+       return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-
-               for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i),
-                                           IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
+       raw_spin_lock(&vector_lock);
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
        data->move_in_progress = 0;
+       if (!cpumask_empty(data->old_domain))
+               apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+       raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
 
                /*
-                * Check if the irq migration is in progress. If so, we
-                * haven't received the cleanup request yet for this irq.
+                * Nothing to cleanup if irq migration is in progress
+                * or this cpu is not set in the cleanup mask.
                 */
-               if (data->move_in_progress)
+               if (data->move_in_progress ||
+                   !cpumask_test_cpu(me, data->old_domain))
                        goto unlock;
 
+               /*
+                * We have two cases to handle here:
+                * 1) vector is unchanged but the target mask got reduced
+                * 2) vector and the target mask has changed
+                *
+                * #1 is obvious, but in #2 we have two vectors with the same
+                * irq descriptor: the old and the new vector. So we need to
+                * make sure that we only cleanup the old vector. The new
+                * vector has the current @vector number in the config and
+                * this cpu is part of the target mask. We better leave that
+                * one alone.
+                */
                if (vector == data->cfg.vector &&
                    cpumask_test_cpu(me, data->domain))
                        goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+               cpumask_clear_cpu(me, data->old_domain);
 unlock:
                raw_spin_unlock(&desc->lock);
        }
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
        __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+       struct apic_chip_data *data = apic_chip_data(irqdata);
+       struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
-       if (cfg)
-               __irq_complete_move(cfg, cfg->vector);
+       if (!cfg)
+               return;
+
+       __irq_complete_move(cfg, cfg->vector);
+
+       /*
+        * This is tricky. If the cleanup of @data->old_domain has not been
+        * done yet, then the following setaffinity call will fail with
+        * -EBUSY. This can leave the interrupt in a stale state.
+        *
+        * The cleanup cannot make progress because we hold @desc->lock. So in
+        * case @data->old_domain is not yet cleaned up, we need to drop the
+        * lock and acquire it again. @desc cannot go away, because the
+        * hotplug code holds the sparse irq lock.
+        */
+       raw_spin_lock(&vector_lock);
+       /* Clean out all offline cpus (including ourself) first. */
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+       while (!cpumask_empty(data->old_domain)) {
+               raw_spin_unlock(&vector_lock);
+               raw_spin_unlock(&desc->lock);
+               cpu_relax();
+               raw_spin_lock(&desc->lock);
+               /*
+                * Reevaluate apic_chip_data. It might have been cleared after
+                * we dropped @desc->lock.
+                */
+               data = apic_chip_data(irqdata);
+               if (!data)
+                       return;
+               raw_spin_lock(&vector_lock);
+       }
+       raw_spin_unlock(&vector_lock);
 }
 #endif
 
index d760c6bb37b53c25931bef3bc47c903def86c8ee..624db00583f44a76283c4d8d5e5377a8342f6517 100644 (file)
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
                return;
        }
        pr_info("UV: Found %s hub\n", hub);
-       map_low_mmrs();
+
+       /* We now only need to map the MMRs on UV1 */
+       if (is_uv1_hub())
+               map_low_mmrs();
 
        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
index a667078a51807bb5bd1ca2e5d5c4ff205e9dd42b..fed2ab1f1065dfdd5a286822102857a013298dfc 100644 (file)
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx, u64 config)
 {
-       int alt_idx;
+       int alt_idx = idx;
+
        if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
                return idx;
 
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
                return;
 
        if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-               void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
                for_each_cpu(i, topology_sibling_cpumask(cpu)) {
                        struct intel_shared_regs *pc;
 
                        pc = per_cpu(cpu_hw_events, i).shared_regs;
                        if (pc && pc->core_id == core_id) {
-                               *onln = cpuc->shared_regs;
+                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
                                cpuc->shared_regs = pc;
                                break;
                        }
index f97f8075bf04c9d6efed980370feca46227afc5a..3bf41d41377505ef28d90ed3363f08e83df70db3 100644 (file)
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
        case 87: /* Knights Landing */
                ret = knl_uncore_pci_init();
                break;
+       case 94: /* SkyLake */
+               ret = skl_uncore_pci_init();
+               break;
        default:
                return 0;
        }
index 07aa2d6bd71094e411a9f98535fc4c65208a66d4..a7086b862156627ebe7ca22996aed6126e61e770 100644 (file)
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
index 0b934820fafd1f64747640e09f1544b862eb8c01..2bd030ddd0db3bf6f5ada42830bd9be9ccc4c1b4 100644 (file)
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
        { /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
        .name           = "snb_uncore",
        .id_table       = snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
        .id_table       = bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+       .name           = "skl_uncore",
+       .id_table       = skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
        __u32 pci_id;
        struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
        IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
        IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
        {  /* end marker */ }
 };
 
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
        return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
index f129a9af635747ce616e354a43f30297f3fecabf..2c0f3407bd1f1ae8adcf43cacdb479c91c096669 100644 (file)
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
        reserve_ebda_region();
 
+       switch (boot_params.hdr.hardware_subarch) {
+       case X86_SUBARCH_INTEL_MID:
+               x86_intel_mid_early_setup();
+               break;
+       default:
+               break;
+       }
+
        start_kernel();
 }
index f8062aaf5df9c830ec287f6774ad270e13d34b32..61521dc19c102114e177cbc21a4f5da9d94c20cd 100644 (file)
@@ -462,7 +462,7 @@ void fixup_irqs(void)
                 * non intr-remapping case, we can't wait till this interrupt
                 * arrives at this cpu before completing the irq move.
                 */
-               irq_force_complete_move(irq);
+               irq_force_complete_move(desc);
 
                if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
                        break_affinity = 1;
@@ -470,6 +470,15 @@ void fixup_irqs(void)
                }
 
                chip = irq_data_get_irq_chip(data);
+               /*
+                * The interrupt descriptor might have been cleaned up
+                * already, but it is not yet removed from the radix tree
+                */
+               if (!chip) {
+                       raw_spin_unlock(&desc->lock);
+                       continue;
+               }
+
                if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
                        chip->irq_mask(data);
 
index 42982b26e32be693713d90e8fa4e18b8f771eb4a..740d7ac03a552bc4937edfc8ff7e8b9d044a61b6 100644 (file)
@@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt)
 }
 __setup("hugepagesz=", setup_hugepagesz);
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 static __init int gigantic_pages_init(void)
 {
-       /* With CMA we can allocate gigantic pages at runtime */
+       /* With compaction or CMA we can allocate gigantic pages at runtime */
        if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        return 0;
index fc6a4c8f6e2abc79c29803264ad16a48067904e8..2440814b00699e33809ce061d6f86a48437f7a10 100644 (file)
@@ -33,7 +33,7 @@ struct cpa_data {
        pgd_t           *pgd;
        pgprot_t        mask_set;
        pgprot_t        mask_clr;
-       int             numpages;
+       unsigned long   numpages;
        int             flags;
        unsigned long   pfn;
        unsigned        force_split : 1;
@@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 * CPA operation. Either a large page has been
                 * preserved or a single page update happened.
                 */
-               BUG_ON(cpa->numpages > numpages);
+               BUG_ON(cpa->numpages > numpages || !cpa->numpages);
                numpages -= cpa->numpages;
                if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
                        cpa->curpage++;
index 1c7380da65ffab641f0cb0aedc88cb7ef7e8f6cc..2d66db8f80f992d3b0609373502031aacf91f161 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -248,6 +249,16 @@ out:
        return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+       { NULL, "SGI UV1",
+               {       DMI_MATCH(DMI_PRODUCT_NAME,     "Stoutland Platform"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION,  "1.0"),
+                       DMI_MATCH(DMI_BIOS_VENDOR,      "SGI.COM"),
+               }
+       },
+       { } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
        /*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
                efi_unmap_memmap();
        }
 
-       /*
-        * UV doesn't support the new EFI pagetable mapping yet.
-        */
-       if (is_uv_system())
+       /* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+       if (dmi_check_system(sgi_uv1_dmi))
                set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 
index 1bbc21e2e4aeab9fadaaa1f21856ab0ab9a4cea1..90bb997ed0a21889fd95482bfe298862b46eaacf 100644 (file)
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
                intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
        else {
                intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-               pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+               pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
        }
 
 out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
        else if (strcmp("lapic_and_apbt", arg) == 0)
                intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
        else {
-               pr_warn("X86 INTEL_MID timer option %s not recognised"
-                          " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-                          arg);
+               pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+                       arg);
                return -EINVAL;
        }
        return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
index c1bdafaac3ca828cec43af9e43d12933e4de7ea9..c61b6c332e971929fd4d639b9754e8707f513930 100644 (file)
@@ -220,11 +220,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
                if (imr_is_enabled(&imr)) {
                        base = imr_to_phys(imr.addr_lo);
                        end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+                       size = end - base + 1;
                } else {
                        base = 0;
                        end = 0;
+                       size = 0;
                }
-               size = end - base;
                seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
                           "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
                           &base, &end, size, imr.rmask, imr.wmask,
@@ -579,6 +580,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 {
        phys_addr_t base = virt_to_phys(&_text);
        size_t size = virt_to_phys(&__end_rodata) - base;
+       unsigned long start, end;
        int i;
        int ret;
 
@@ -586,18 +588,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
        for (i = 0; i < idev->max_imr; i++)
                imr_clear(i);
 
+       start = (unsigned long)_text;
+       end = (unsigned long)__end_rodata - 1;
+
        /*
         * Setup a locked IMR around the physical extent of the kernel
         * from the beginning of the .text secton to the end of the
         * .rodata section as one physically contiguous block.
+        *
+        * We don't round up @size since it is already PAGE_SIZE aligned.
+        * See vmlinux.lds.S for details.
         */
        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
        if (ret < 0) {
-               pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-                       &_text, &__end_rodata);
+               pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        } else {
-               pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-                       size / 1024, &_text, &__end_rodata);
+               pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        }
 
 }
index db5f622c9d67e05a4e2a15778c922b367e9cd82b..9eda2322b2d40acfb308a8afa245de04c9b84989 100644 (file)
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                        blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
                        blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-                       blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
+                       blk-lib.o blk-mq.o blk-mq-tag.o \
                        blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
                        genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
                        badblocks.o partitions/
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
deleted file mode 100644 (file)
index 0736729..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Functions related to interrupt-poll handling in the block layer. This
- * is similar to NAPI for network devices.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/blk-iopoll.h>
-#include <linux/delay.h>
-
-#include "blk.h"
-
-static unsigned int blk_iopoll_budget __read_mostly = 256;
-
-static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
-
-/**
- * blk_iopoll_sched - Schedule a run of the iopoll handler
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Add this blk_iopoll structure to the pending poll list and trigger the
- *     raise of the blk iopoll softirq. The driver must already have gotten a
- *     successful return from blk_iopoll_sched_prep() before calling this.
- **/
-void blk_iopoll_sched(struct blk_iopoll *iop)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
-       __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_sched);
-
-/**
- * __blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     See blk_iopoll_complete(). This function must be called with interrupts
- *     disabled.
- **/
-void __blk_iopoll_complete(struct blk_iopoll *iop)
-{
-       list_del(&iop->list);
-       smp_mb__before_atomic();
-       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(__blk_iopoll_complete);
-
-/**
- * blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     If a driver consumes less than the assigned budget in its run of the
- *     iopoll handler, it'll end the polled mode by calling this function. The
- *     iopoll handler will not be invoked again before blk_iopoll_sched_prep()
- *     is called.
- **/
-void blk_iopoll_complete(struct blk_iopoll *iop)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __blk_iopoll_complete(iop);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_complete);
-
-static void blk_iopoll_softirq(struct softirq_action *h)
-{
-       struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
-       int rearm = 0, budget = blk_iopoll_budget;
-       unsigned long start_time = jiffies;
-
-       local_irq_disable();
-
-       while (!list_empty(list)) {
-               struct blk_iopoll *iop;
-               int work, weight;
-
-               /*
-                * If softirq window is exhausted then punt.
-                */
-               if (budget <= 0 || time_after(jiffies, start_time)) {
-                       rearm = 1;
-                       break;
-               }
-
-               local_irq_enable();
-
-               /* Even though interrupts have been re-enabled, this
-                * access is safe because interrupts can only add new
-                * entries to the tail of this list, and only ->poll()
-                * calls can remove this head entry from the list.
-                */
-               iop = list_entry(list->next, struct blk_iopoll, list);
-
-               weight = iop->weight;
-               work = 0;
-               if (test_bit(IOPOLL_F_SCHED, &iop->state))
-                       work = iop->poll(iop, weight);
-
-               budget -= work;
-
-               local_irq_disable();
-
-               /*
-                * Drivers must not modify the iopoll state, if they
-                * consume their assigned weight (or more, some drivers can't
-                * easily just stop processing, they have to complete an
-                * entire mask of commands).In such cases this code
-                * still "owns" the iopoll instance and therefore can
-                * move the instance around on the list at-will.
-                */
-               if (work >= weight) {
-                       if (blk_iopoll_disable_pending(iop))
-                               __blk_iopoll_complete(iop);
-                       else
-                               list_move_tail(&iop->list, list);
-               }
-       }
-
-       if (rearm)
-               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-
-       local_irq_enable();
-}
-
-/**
- * blk_iopoll_disable - Disable iopoll on this @iop
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Disable io polling and wait for any pending callbacks to have completed.
- **/
-void blk_iopoll_disable(struct blk_iopoll *iop)
-{
-       set_bit(IOPOLL_F_DISABLE, &iop->state);
-       while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
-               msleep(1);
-       clear_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_disable);
-
-/**
- * blk_iopoll_enable - Enable iopoll on this @iop
- * @iop:      The parent iopoll structure
- *
- * Description:
- *     Enable iopoll on this @iop. Note that the handler run will not be
- *     scheduled, it will only mark it as active.
- **/
-void blk_iopoll_enable(struct blk_iopoll *iop)
-{
-       BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
-       smp_mb__before_atomic();
-       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_enable);
-
-/**
- * blk_iopoll_init - Initialize this @iop
- * @iop:      The parent iopoll structure
- * @weight:   The default weight (or command completion budget)
- * @poll_fn:  The handler to invoke
- *
- * Description:
- *     Initialize this blk_iopoll structure. Before being actively used, the
- *     driver must call blk_iopoll_enable().
- **/
-void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
-{
-       memset(iop, 0, sizeof(*iop));
-       INIT_LIST_HEAD(&iop->list);
-       iop->weight = weight;
-       iop->poll = poll_fn;
-       set_bit(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_init);
-
-static int blk_iopoll_cpu_notify(struct notifier_block *self,
-                                unsigned long action, void *hcpu)
-{
-       /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
-        */
-       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-               int cpu = (unsigned long) hcpu;
-
-               local_irq_disable();
-               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
-                                this_cpu_ptr(&blk_cpu_iopoll));
-               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-               local_irq_enable();
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block blk_iopoll_cpu_notifier = {
-       .notifier_call  = blk_iopoll_cpu_notify,
-};
-
-static __init int blk_iopoll_setup(void)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
-
-       open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
-       register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
-       return 0;
-}
-subsys_initcall(blk_iopoll_setup);
index 1699df5b0493a721a5ee1945a07c8ec516856164..888a7fec81f715199c3050576516e55b692f2b9b 100644 (file)
@@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
        return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+                                      struct bio *bio)
+{
+       unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+       unsigned mask = queue_logical_block_size(q) - 1;
+
+       /* aligned to logical block size */
+       sectors &= ~(mask >> 9);
+
+       return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
                                         struct bio *bio,
                                         struct bio_set *bs,
@@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        unsigned front_seg_size = bio->bi_seg_front_size;
        bool do_split = true;
        struct bio *new = NULL;
+       const unsigned max_sectors = get_max_io_size(q, bio);
 
        bio_for_each_segment(bv, bio, iter) {
                /*
@@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
                if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
                        goto split;
 
-               if (sectors + (bv.bv_len >> 9) >
-                               blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+               if (sectors + (bv.bv_len >> 9) > max_sectors) {
                        /*
                         * Consider this a new segment if we're splitting in
                         * the middle of this vector.
                         */
                        if (nsegs < queue_max_segments(q) &&
-                           sectors < blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector)) {
+                           sectors < max_sectors) {
                                nsegs++;
-                               sectors = blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector);
+                               sectors = max_sectors;
                        }
-                       goto split;
+                       if (sectors)
+                               goto split;
+                       /* Make this single bvec as the 1st segment */
                }
 
                if (bvprvp && blk_queue_cluster(q)) {
index 2c84683aada56e4b52c9080b88ce99eb1d7b68c8..d8996bbd7f12805b9c74429813f5b3ee3d744bdc 100644 (file)
@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
 
        return true;
 }
-
-static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
-{
-       unsigned long arg;
-       int rc = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       if (get_user(arg, (int __user *)(argp)))
-               return -EFAULT;
-       arg = !!arg;
-       if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
-               return 0;
-
-       if (arg)
-               arg = S_DAX;
-
-       if (arg && !blkdev_dax_capable(bdev))
-               return -ENOTTY;
-
-       mutex_lock(&bdev->bd_inode->i_mutex);
-       if (bdev->bd_map_count == 0)
-               inode_set_flags(bdev->bd_inode, arg, S_DAX);
-       else
-               rc = -EBUSY;
-       mutex_unlock(&bdev->bd_inode->i_mutex);
-       return rc;
-}
-#else
-static int blkdev_daxset(struct block_device *bdev, int arg)
-{
-       if (arg)
-               return -ENOTTY;
-       return 0;
-}
 #endif
 
 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        case BLKTRACESETUP:
        case BLKTRACETEARDOWN:
                return blk_trace_ioctl(bdev, cmd, argp);
-       case BLKDAXSET:
-               return blkdev_daxset(bdev, arg);
        case BLKDAXGET:
                return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
                break;
index 746935a5973ca6c76b8f66cfff8d58ce50566994..fefd01b496a0852754d9a586c9b0dd6d940ff746 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kmod.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
+#include <linux/dax.h>
 #include <linux/blktrace_api.h>
 
 #include "partitions/check.h"
@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
        return 0;
 }
 
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+       return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
+                       NULL);
+}
+
+unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+{
        struct page *page;
 
-       page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-                                NULL);
+       /* don't populate page cache for dax capable devices */
+       if (IS_DAX(bdev->bd_inode))
+               page = read_dax_sector(bdev, n);
+       else
+               page = read_pagecache_sector(bdev, n);
+
        if (!IS_ERR(page)) {
                if (PageError(page))
                        goto fail;
index 7240821137fde371f0e32d9ae8ac25b64ab58862..3be07ad1d80dc835789c92ba020b58809010473a 100644 (file)
@@ -472,11 +472,13 @@ config CRYPTO_CRCT10DIF_PCLMUL
 config CRYPTO_GHASH
        tristate "GHASH digest algorithm"
        select CRYPTO_GF128MUL
+       select CRYPTO_HASH
        help
          GHASH is message digest algorithm for GCM (Galois/Counter Mode).
 
 config CRYPTO_POLY1305
        tristate "Poly1305 authenticator algorithm"
+       select CRYPTO_HASH
        help
          Poly1305 authenticator algorithm, RFC7539.
 
index a8e7aa3e257bbc3c6254b82d2966b9fde3031184..f5e18c2a48527bb3f5bbdc5202b37577689710b3 100644 (file)
@@ -76,6 +76,8 @@ int af_alg_register_type(const struct af_alg_type *type)
                goto unlock;
 
        type->ops->owner = THIS_MODULE;
+       if (type->ops_nokey)
+               type->ops_nokey->owner = THIS_MODULE;
        node->type = type;
        list_add(&node->list, &alg_types);
        err = 0;
@@ -125,6 +127,26 @@ int af_alg_release(struct socket *sock)
 }
 EXPORT_SYMBOL_GPL(af_alg_release);
 
+void af_alg_release_parent(struct sock *sk)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       unsigned int nokey = ask->nokey_refcnt;
+       bool last = nokey && !ask->refcnt;
+
+       sk = ask->parent;
+       ask = alg_sk(sk);
+
+       lock_sock(sk);
+       ask->nokey_refcnt -= nokey;
+       if (!last)
+               last = !--ask->refcnt;
+       release_sock(sk);
+
+       if (last)
+               sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_release_parent);
+
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        const u32 forbidden = CRYPTO_ALG_INTERNAL;
@@ -133,6 +155,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct sockaddr_alg *sa = (void *)uaddr;
        const struct af_alg_type *type;
        void *private;
+       int err;
 
        if (sock->state == SS_CONNECTED)
                return -EINVAL;
@@ -160,16 +183,22 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                return PTR_ERR(private);
        }
 
+       err = -EBUSY;
        lock_sock(sk);
+       if (ask->refcnt | ask->nokey_refcnt)
+               goto unlock;
 
        swap(ask->type, type);
        swap(ask->private, private);
 
+       err = 0;
+
+unlock:
        release_sock(sk);
 
        alg_do_release(type, private);
 
-       return 0;
+       return err;
 }
 
 static int alg_setkey(struct sock *sk, char __user *ukey,
@@ -202,11 +231,15 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
-       int err = -ENOPROTOOPT;
+       int err = -EBUSY;
 
        lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock;
+
        type = ask->type;
 
+       err = -ENOPROTOOPT;
        if (level != SOL_ALG || !type)
                goto unlock;
 
@@ -238,6 +271,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
        struct sock *sk2;
+       unsigned int nokey;
        int err;
 
        lock_sock(sk);
@@ -257,20 +291,29 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        security_sk_clone(sk, sk2);
 
        err = type->accept(ask->private, sk2);
-       if (err) {
-               sk_free(sk2);
+
+       nokey = err == -ENOKEY;
+       if (nokey && type->accept_nokey)
+               err = type->accept_nokey(ask->private, sk2);
+
+       if (err)
                goto unlock;
-       }
 
        sk2->sk_family = PF_ALG;
 
-       sock_hold(sk);
+       if (nokey || !ask->refcnt++)
+               sock_hold(sk);
+       ask->nokey_refcnt += nokey;
        alg_sk(sk2)->parent = sk;
        alg_sk(sk2)->type = type;
+       alg_sk(sk2)->nokey_refcnt = nokey;
 
        newsock->ops = type->ops;
        newsock->state = SS_CONNECTED;
 
+       if (nokey)
+               newsock->ops = type->ops_nokey;
+
        err = 0;
 
 unlock:
index 9c1dc8d6106a89a0f853271c1dfc49cd301ec983..d19b52324cf520ee777743ee895efb2f537f5e62 100644 (file)
@@ -451,6 +451,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
        struct ahash_alg *alg = crypto_ahash_alg(hash);
 
        hash->setkey = ahash_nosetkey;
+       hash->has_setkey = false;
        hash->export = ahash_no_export;
        hash->import = ahash_no_import;
 
@@ -463,8 +464,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
        hash->finup = alg->finup ?: ahash_def_finup;
        hash->digest = alg->digest;
 
-       if (alg->setkey)
+       if (alg->setkey) {
                hash->setkey = alg->setkey;
+               hash->has_setkey = true;
+       }
        if (alg->export)
                hash->export = alg->export;
        if (alg->import)
index b4c24fe3dcfb5ed575350406c9dd7ece3c006aef..68a5ceaa04c81072f453a7b2ca2605bed39bd5a1 100644 (file)
@@ -34,6 +34,11 @@ struct hash_ctx {
        struct ahash_request req;
 };
 
+struct algif_hash_tfm {
+       struct crypto_ahash *hash;
+       bool has_key;
+};
+
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                        size_t ignored)
 {
@@ -49,7 +54,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        if (!ctx->more) {
-               err = crypto_ahash_init(&ctx->req);
+               err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
+                                               &ctx->completion);
                if (err)
                        goto unlock;
        }
@@ -120,6 +126,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
        } else {
                if (!ctx->more) {
                        err = crypto_ahash_init(&ctx->req);
+                       err = af_alg_wait_for_completion(err, &ctx->completion);
                        if (err)
                                goto unlock;
                }
@@ -235,19 +242,151 @@ static struct proto_ops algif_hash_ops = {
        .accept         =       hash_accept,
 };
 
+static int hash_check_key(struct socket *sock)
+{
+       int err = 0;
+       struct sock *psk;
+       struct alg_sock *pask;
+       struct algif_hash_tfm *tfm;
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+
+       lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock_child;
+
+       psk = ask->parent;
+       pask = alg_sk(ask->parent);
+       tfm = pask->private;
+
+       err = -ENOKEY;
+       lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+       if (!tfm->has_key)
+               goto unlock;
+
+       if (!pask->refcnt++)
+               sock_hold(psk);
+
+       ask->refcnt = 1;
+       sock_put(psk);
+
+       err = 0;
+
+unlock:
+       release_sock(psk);
+unlock_child:
+       release_sock(sk);
+
+       return err;
+}
+
+static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+                             size_t size)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_sendmsg(sock, msg, size);
+}
+
+static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page,
+                                  int offset, size_t size, int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_sendpage(sock, page, offset, size, flags);
+}
+
+static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+                             size_t ignored, int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_recvmsg(sock, msg, ignored, flags);
+}
+
+static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
+                            int flags)
+{
+       int err;
+
+       err = hash_check_key(sock);
+       if (err)
+               return err;
+
+       return hash_accept(sock, newsock, flags);
+}
+
+static struct proto_ops algif_hash_ops_nokey = {
+       .family         =       PF_ALG,
+
+       .connect        =       sock_no_connect,
+       .socketpair     =       sock_no_socketpair,
+       .getname        =       sock_no_getname,
+       .ioctl          =       sock_no_ioctl,
+       .listen         =       sock_no_listen,
+       .shutdown       =       sock_no_shutdown,
+       .getsockopt     =       sock_no_getsockopt,
+       .mmap           =       sock_no_mmap,
+       .bind           =       sock_no_bind,
+       .setsockopt     =       sock_no_setsockopt,
+       .poll           =       sock_no_poll,
+
+       .release        =       af_alg_release,
+       .sendmsg        =       hash_sendmsg_nokey,
+       .sendpage       =       hash_sendpage_nokey,
+       .recvmsg        =       hash_recvmsg_nokey,
+       .accept         =       hash_accept_nokey,
+};
+
 static void *hash_bind(const char *name, u32 type, u32 mask)
 {
-       return crypto_alloc_ahash(name, type, mask);
+       struct algif_hash_tfm *tfm;
+       struct crypto_ahash *hash;
+
+       tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+       if (!tfm)
+               return ERR_PTR(-ENOMEM);
+
+       hash = crypto_alloc_ahash(name, type, mask);
+       if (IS_ERR(hash)) {
+               kfree(tfm);
+               return ERR_CAST(hash);
+       }
+
+       tfm->hash = hash;
+
+       return tfm;
 }
 
 static void hash_release(void *private)
 {
-       crypto_free_ahash(private);
+       struct algif_hash_tfm *tfm = private;
+
+       crypto_free_ahash(tfm->hash);
+       kfree(tfm);
 }
 
 static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-       return crypto_ahash_setkey(private, key, keylen);
+       struct algif_hash_tfm *tfm = private;
+       int err;
+
+       err = crypto_ahash_setkey(tfm->hash, key, keylen);
+       tfm->has_key = !err;
+
+       return err;
 }
 
 static void hash_sock_destruct(struct sock *sk)
@@ -261,12 +400,14 @@ static void hash_sock_destruct(struct sock *sk)
        af_alg_release_parent(sk);
 }
 
-static int hash_accept_parent(void *private, struct sock *sk)
+static int hash_accept_parent_nokey(void *private, struct sock *sk)
 {
        struct hash_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
-       unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private);
-       unsigned ds = crypto_ahash_digestsize(private);
+       struct algif_hash_tfm *tfm = private;
+       struct crypto_ahash *hash = tfm->hash;
+       unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
+       unsigned ds = crypto_ahash_digestsize(hash);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
@@ -286,7 +427,7 @@ static int hash_accept_parent(void *private, struct sock *sk)
 
        ask->private = ctx;
 
-       ahash_request_set_tfm(&ctx->req, private);
+       ahash_request_set_tfm(&ctx->req, hash);
        ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                   af_alg_complete, &ctx->completion);
 
@@ -295,12 +436,24 @@ static int hash_accept_parent(void *private, struct sock *sk)
        return 0;
 }
 
+static int hash_accept_parent(void *private, struct sock *sk)
+{
+       struct algif_hash_tfm *tfm = private;
+
+       if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash))
+               return -ENOKEY;
+
+       return hash_accept_parent_nokey(private, sk);
+}
+
 static const struct af_alg_type algif_type_hash = {
        .bind           =       hash_bind,
        .release        =       hash_release,
        .setkey         =       hash_setkey,
        .accept         =       hash_accept_parent,
+       .accept_nokey   =       hash_accept_parent_nokey,
        .ops            =       &algif_hash_ops,
+       .ops_nokey      =       &algif_hash_ops_nokey,
        .name           =       "hash",
        .owner          =       THIS_MODULE
 };
index eaa9f9be5b87824446c20f07d6bef14114e866b3..28556fce42671e2f182d5239d3dc6468e5b1d970 100644 (file)
@@ -31,6 +31,11 @@ struct skcipher_sg_list {
        struct scatterlist sg[0];
 };
 
+struct skcipher_tfm {
+       struct crypto_skcipher *skcipher;
+       bool has_key;
+};
+
 struct skcipher_ctx {
        struct list_head tsgl;
        struct af_alg_sgl rsgl;
@@ -60,18 +65,10 @@ struct skcipher_async_req {
        struct skcipher_async_rsgl first_sgl;
        struct list_head list;
        struct scatterlist *tsg;
-       char iv[];
+       atomic_t *inflight;
+       struct skcipher_request req;
 };
 
-#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
-       crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)))
-
-#define GET_REQ_SIZE(ctx) \
-       crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))
-
-#define GET_IV_SIZE(ctx) \
-       crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req))
-
 #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
                      sizeof(struct scatterlist) - 1)
 
@@ -97,15 +94,12 @@ static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
 
 static void skcipher_async_cb(struct crypto_async_request *req, int err)
 {
-       struct sock *sk = req->data;
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_async_req *sreq = GET_SREQ(req, ctx);
+       struct skcipher_async_req *sreq = req->data;
        struct kiocb *iocb = sreq->iocb;
 
-       atomic_dec(&ctx->inflight);
+       atomic_dec(sreq->inflight);
        skcipher_free_async_sgls(sreq);
-       kfree(req);
+       kzfree(sreq);
        iocb->ki_complete(iocb, err, err);
 }
 
@@ -301,8 +295,11 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
-       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
        unsigned ivsize = crypto_skcipher_ivsize(tfm);
        struct skcipher_sg_list *sgl;
        struct af_alg_control con = {};
@@ -387,7 +384,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 
                sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
                sg = sgl->sg;
-               sg_unmark_end(sg + sgl->cur);
+               if (sgl->cur)
+                       sg_unmark_end(sg + sgl->cur - 1);
                do {
                        i = sgl->cur;
                        plen = min_t(size_t, len, PAGE_SIZE);
@@ -503,37 +501,43 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
        struct skcipher_sg_list *sgl;
        struct scatterlist *sg;
        struct skcipher_async_req *sreq;
        struct skcipher_request *req;
        struct skcipher_async_rsgl *last_rsgl = NULL;
-       unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx);
-       unsigned int reqlen = sizeof(struct skcipher_async_req) +
-                               GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx);
+       unsigned int txbufs = 0, len = 0, tx_nents;
+       unsigned int reqsize = crypto_skcipher_reqsize(tfm);
+       unsigned int ivsize = crypto_skcipher_ivsize(tfm);
        int err = -ENOMEM;
        bool mark = false;
+       char *iv;
 
-       lock_sock(sk);
-       req = kmalloc(reqlen, GFP_KERNEL);
-       if (unlikely(!req))
-               goto unlock;
+       sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
+       if (unlikely(!sreq))
+               goto out;
 
-       sreq = GET_SREQ(req, ctx);
+       req = &sreq->req;
+       iv = (char *)(req + 1) + reqsize;
        sreq->iocb = msg->msg_iocb;
-       memset(&sreq->first_sgl, '\0', sizeof(struct skcipher_async_rsgl));
        INIT_LIST_HEAD(&sreq->list);
+       sreq->inflight = &ctx->inflight;
+
+       lock_sock(sk);
+       tx_nents = skcipher_all_sg_nents(ctx);
        sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-       if (unlikely(!sreq->tsg)) {
-               kfree(req);
+       if (unlikely(!sreq->tsg))
                goto unlock;
-       }
        sg_init_table(sreq->tsg, tx_nents);
-       memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
-       skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req));
-       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                     skcipher_async_cb, sk);
+       memcpy(iv, ctx->iv, ivsize);
+       skcipher_request_set_tfm(req, tfm);
+       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     skcipher_async_cb, sreq);
 
        while (iov_iter_count(&msg->msg_iter)) {
                struct skcipher_async_rsgl *rsgl;
@@ -609,20 +613,22 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
                sg_mark_end(sreq->tsg + txbufs - 1);
 
        skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-                                  len, sreq->iv);
+                                  len, iv);
        err = ctx->enc ? crypto_skcipher_encrypt(req) :
                         crypto_skcipher_decrypt(req);
        if (err == -EINPROGRESS) {
                atomic_inc(&ctx->inflight);
                err = -EIOCBQUEUED;
+               sreq = NULL;
                goto unlock;
        }
 free:
        skcipher_free_async_sgls(sreq);
-       kfree(req);
 unlock:
        skcipher_wmem_wakeup(sk);
        release_sock(sk);
+       kzfree(sreq);
+out:
        return err;
 }
 
@@ -631,9 +637,12 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
        struct skcipher_ctx *ctx = ask->private;
-       unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm(
-               &ctx->req));
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
+       unsigned bs = crypto_skcipher_blocksize(tfm);
        struct skcipher_sg_list *sgl;
        struct scatterlist *sg;
        int err = -EAGAIN;
@@ -642,13 +651,6 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        while (msg_data_left(msg)) {
-               sgl = list_first_entry(&ctx->tsgl,
-                                      struct skcipher_sg_list, list);
-               sg = sgl->sg;
-
-               while (!sg->length)
-                       sg++;
-
                if (!ctx->used) {
                        err = skcipher_wait_for_data(sk, flags);
                        if (err)
@@ -669,6 +671,13 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
                if (!used)
                        goto free;
 
+               sgl = list_first_entry(&ctx->tsgl,
+                                      struct skcipher_sg_list, list);
+               sg = sgl->sg;
+
+               while (!sg->length)
+                       sg++;
+
                skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
                                           ctx->iv);
 
@@ -748,19 +757,139 @@ static struct proto_ops algif_skcipher_ops = {
        .poll           =       skcipher_poll,
 };
 
+static int skcipher_check_key(struct socket *sock)
+{
+       int err = 0;
+       struct sock *psk;
+       struct alg_sock *pask;
+       struct skcipher_tfm *tfm;
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+
+       lock_sock(sk);
+       if (ask->refcnt)
+               goto unlock_child;
+
+       psk = ask->parent;
+       pask = alg_sk(ask->parent);
+       tfm = pask->private;
+
+       err = -ENOKEY;
+       lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+       if (!tfm->has_key)
+               goto unlock;
+
+       if (!pask->refcnt++)
+               sock_hold(psk);
+
+       ask->refcnt = 1;
+       sock_put(psk);
+
+       err = 0;
+
+unlock:
+       release_sock(psk);
+unlock_child:
+       release_sock(sk);
+
+       return err;
+}
+
+static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+                                 size_t size)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_sendmsg(sock, msg, size);
+}
+
+static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
+                                      int offset, size_t size, int flags)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_sendpage(sock, page, offset, size, flags);
+}
+
+static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+                                 size_t ignored, int flags)
+{
+       int err;
+
+       err = skcipher_check_key(sock);
+       if (err)
+               return err;
+
+       return skcipher_recvmsg(sock, msg, ignored, flags);
+}
+
+static struct proto_ops algif_skcipher_ops_nokey = {
+       .family         =       PF_ALG,
+
+       .connect        =       sock_no_connect,
+       .socketpair     =       sock_no_socketpair,
+       .getname        =       sock_no_getname,
+       .ioctl          =       sock_no_ioctl,
+       .listen         =       sock_no_listen,
+       .shutdown       =       sock_no_shutdown,
+       .getsockopt     =       sock_no_getsockopt,
+       .mmap           =       sock_no_mmap,
+       .bind           =       sock_no_bind,
+       .accept         =       sock_no_accept,
+       .setsockopt     =       sock_no_setsockopt,
+
+       .release        =       af_alg_release,
+       .sendmsg        =       skcipher_sendmsg_nokey,
+       .sendpage       =       skcipher_sendpage_nokey,
+       .recvmsg        =       skcipher_recvmsg_nokey,
+       .poll           =       skcipher_poll,
+};
+
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-       return crypto_alloc_skcipher(name, type, mask);
+       struct skcipher_tfm *tfm;
+       struct crypto_skcipher *skcipher;
+
+       tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+       if (!tfm)
+               return ERR_PTR(-ENOMEM);
+
+       skcipher = crypto_alloc_skcipher(name, type, mask);
+       if (IS_ERR(skcipher)) {
+               kfree(tfm);
+               return ERR_CAST(skcipher);
+       }
+
+       tfm->skcipher = skcipher;
+
+       return tfm;
 }
 
 static void skcipher_release(void *private)
 {
-       crypto_free_skcipher(private);
+       struct skcipher_tfm *tfm = private;
+
+       crypto_free_skcipher(tfm->skcipher);
+       kfree(tfm);
 }
 
 static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-       return crypto_skcipher_setkey(private, key, keylen);
+       struct skcipher_tfm *tfm = private;
+       int err;
+
+       err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
+       tfm->has_key = !err;
+
+       return err;
 }
 
 static void skcipher_wait(struct sock *sk)
@@ -788,24 +917,26 @@ static void skcipher_sock_destruct(struct sock *sk)
        af_alg_release_parent(sk);
 }
 
-static int skcipher_accept_parent(void *private, struct sock *sk)
+static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
        struct skcipher_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
-       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
+       struct skcipher_tfm *tfm = private;
+       struct crypto_skcipher *skcipher = tfm->skcipher;
+       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;
 
-       ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
+       ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
                               GFP_KERNEL);
        if (!ctx->iv) {
                sock_kfree_s(sk, ctx, len);
                return -ENOMEM;
        }
 
-       memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
+       memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
        INIT_LIST_HEAD(&ctx->tsgl);
        ctx->len = len;
@@ -818,8 +949,9 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
 
        ask->private = ctx;
 
-       skcipher_request_set_tfm(&ctx->req, private);
-       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+       skcipher_request_set_tfm(&ctx->req, skcipher);
+       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+                                                CRYPTO_TFM_REQ_MAY_BACKLOG,
                                      af_alg_complete, &ctx->completion);
 
        sk->sk_destruct = skcipher_sock_destruct;
@@ -827,12 +959,24 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
        return 0;
 }
 
+static int skcipher_accept_parent(void *private, struct sock *sk)
+{
+       struct skcipher_tfm *tfm = private;
+
+       if (!tfm->has_key && crypto_skcipher_has_setkey(tfm->skcipher))
+               return -ENOKEY;
+
+       return skcipher_accept_parent_nokey(private, sk);
+}
+
 static const struct af_alg_type algif_type_skcipher = {
        .bind           =       skcipher_bind,
        .release        =       skcipher_release,
        .setkey         =       skcipher_setkey,
        .accept         =       skcipher_accept_parent,
+       .accept_nokey   =       skcipher_accept_parent_nokey,
        .ops            =       &algif_skcipher_ops,
+       .ops_nokey      =       &algif_skcipher_ops_nokey,
        .name           =       "skcipher",
        .owner          =       THIS_MODULE
 };
index 758acabf2d819727ecd7f4cf0c6aa21c1c40649f..8f3056cd03991ddf820c3d8442d35daf3db7ee84 100644 (file)
@@ -547,9 +547,7 @@ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen,
        struct pkcs7_signed_info *sinfo = ctx->sinfo;
 
        if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) ||
-           !test_bit(sinfo_has_message_digest, &sinfo->aa_set) ||
-           (ctx->msg->data_type == OID_msIndirectData &&
-            !test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set))) {
+           !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) {
                pr_warn("Missing required AuthAttr\n");
                return -EBADMSG;
        }
index 06f1b60f02b223eeea70c000ec4055c470d4eeae..4c0a0e27187694c6d0d7281c1d61b2f25557f269 100644 (file)
@@ -172,4 +172,3 @@ MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CRYPTO("crc32c");
 MODULE_ALIAS_CRYPTO("crc32c-generic");
-MODULE_SOFTDEP("pre: crc32c");
index 237f3795cfaaa1f988fadf5b07eefe3c44609091..43fe85f20d577b4f3d1bbd6576b6d752bc578531 100644 (file)
@@ -499,6 +499,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                if (link->dump == NULL)
                        return -EINVAL;
 
+               down_read(&crypto_alg_sem);
                list_for_each_entry(alg, &crypto_alg_list, cra_list)
                        dump_alloc += CRYPTO_REPORT_MAXSIZE;
 
@@ -508,8 +509,11 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                .done = link->done,
                                .min_dump_alloc = dump_alloc,
                        };
-                       return netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+                       err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
                }
+               up_read(&crypto_alg_sem);
+
+               return err;
        }
 
        err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX,
index ecb1e3d39bf0776a0d8805a9f8fadd3ec288bcb0..359754591653c7b265a5e6b89d70e9a9d16d33e3 100644 (file)
@@ -354,9 +354,10 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
        crt->final = shash_async_final;
        crt->finup = shash_async_finup;
        crt->digest = shash_async_digest;
+       crt->setkey = shash_async_setkey;
+
+       crt->has_setkey = alg->setkey != shash_no_setkey;
 
-       if (alg->setkey)
-               crt->setkey = shash_async_setkey;
        if (alg->export)
                crt->export = shash_async_export;
        if (alg->import)
index 7591928be7ca789cfc909b3ea300fb6f2fd2cb6f..d199c0b1751c91cbcc5a978aadb97cdf609e33ab 100644 (file)
@@ -118,6 +118,7 @@ static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
        skcipher->decrypt = skcipher_decrypt_blkcipher;
 
        skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
+       skcipher->has_setkey = calg->cra_blkcipher.max_keysize;
 
        return 0;
 }
@@ -210,6 +211,7 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
        skcipher->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
        skcipher->reqsize = crypto_ablkcipher_reqsize(ablkcipher) +
                            sizeof(struct ablkcipher_request);
+       skcipher->has_setkey = calg->cra_ablkcipher.max_keysize;
 
        return 0;
 }
index c570b1d9f09480ecec5513d3cfe61d33c26c81b5..0872d5fecb82f2dee893c7feb0ac6a408f903384 100644 (file)
@@ -880,7 +880,7 @@ static int acpi_lpss_platform_notify(struct notifier_block *nb,
                break;
        case BUS_NOTIFY_DRIVER_NOT_BOUND:
        case BUS_NOTIFY_UNBOUND_DRIVER:
-               pdev->dev.pm_domain = NULL;
+               dev_pm_domain_set(&pdev->dev, NULL);
                break;
        case BUS_NOTIFY_ADD_DEVICE:
                dev_pm_domain_set(&pdev->dev, &acpi_lpss_pm_domain);
index 6682c5daf742637024c1f72a19f7de750cb1bbe1..6e6bc1059301745fc1d5fd6ebf943d048082e821 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/hardirq.h>
 #include <linux/pstore.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -532,10 +533,7 @@ retry:
                        return -ENOMEM;
                memcpy(new_entries, entries,
                       erst_record_id_cache.len * sizeof(entries[0]));
-               if (erst_record_id_cache.size < PAGE_SIZE)
-                       kfree(entries);
-               else
-                       vfree(entries);
+               kvfree(entries);
                erst_record_id_cache.entries = entries = new_entries;
                erst_record_id_cache.size = new_size;
        }
index 90e2d54be526bcc0df8ec41bf292a8016d99ba3d..1316ddd92fac0f6c59f4be85759947a2d1643334 100644 (file)
@@ -135,14 +135,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
                },
        },
-       {
-       .callback = video_detect_force_vendor,
-       .ident = "Dell Inspiron 5737",
-       .matches = {
-               DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-               DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-               },
-       },
 
        /*
         * These models have a working acpi_video backlight control, and using
index 4a5c9d27905900f7bdc1d978f3ef89116e1d83c9..294ba6f363960617c3d1ac2e6191c81e4b384372 100644 (file)
@@ -4,7 +4,7 @@ config ARM_AMBA
 if ARM_AMBA
 
 config TEGRA_AHB
-       bool "Enable AHB driver for NVIDIA Tegra SoCs"
+       bool
        default y if ARCH_TEGRA
        help
          Adds AHB configuration functionality for NVIDIA Tegra SoCs,
index 68f03141e432a2c5a03358d59ed541ac9ae79529..44a74cf1372c6e710d2b102ac51dd34ee44ffd0e 100644 (file)
@@ -215,9 +215,9 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                newattrs.ia_uid = uid;
                newattrs.ia_gid = gid;
                newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
-               mutex_lock(&d_inode(dentry)->i_mutex);
+               inode_lock(d_inode(dentry));
                notify_change(dentry, &newattrs, NULL);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
 
                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
@@ -244,7 +244,7 @@ static int dev_rmdir(const char *name)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+       inode_unlock(d_inode(parent.dentry));
        path_put(&parent);
        return err;
 }
@@ -321,9 +321,9 @@ static int handle_remove(const char *nodename, struct device *dev)
                        newattrs.ia_mode = stat.mode & ~0777;
                        newattrs.ia_valid =
                                ATTR_UID|ATTR_GID|ATTR_MODE;
-                       mutex_lock(&d_inode(dentry)->i_mutex);
+                       inode_lock(d_inode(dentry));
                        notify_change(dentry, &newattrs, NULL);
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+       inode_unlock(d_inode(parent.dentry));
 
        path_put(&parent);
        if (deleted && strchr(nodename, '/'))
index 47c43386786b13229f4fbeb5cbdbfe03a2009804..279e53989374f065d01039caf110a14b95f887e0 100644 (file)
@@ -284,6 +284,7 @@ out_free_priv_data:
 
        return err;
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
 
 /**
  * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
@@ -301,6 +302,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
        msi_domain_free_irqs(dev->msi_domain, dev);
        platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
 
 /**
  * platform_msi_get_host_data - Query the private data associated with
index 73d6e5d39e33e1edef9033d5e97b99ecc50a80cf..f437afa17f2b1d2933ed0882d7e7f291c6d86309 100644 (file)
@@ -558,10 +558,15 @@ static int platform_drv_probe(struct device *_dev)
                return ret;
 
        ret = dev_pm_domain_attach(_dev, true);
-       if (ret != -EPROBE_DEFER && drv->probe) {
-               ret = drv->probe(dev);
-               if (ret)
-                       dev_pm_domain_detach(_dev, true);
+       if (ret != -EPROBE_DEFER) {
+               if (drv->probe) {
+                       ret = drv->probe(dev);
+                       if (ret)
+                               dev_pm_domain_detach(_dev, true);
+               } else {
+                       /* don't fail if just dev_pm_domain_attach failed */
+                       ret = 0;
+               }
        }
 
        if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
index 93ed14cc22524ccdd04301dbe460e0958bb69acf..f6a9ad52cbbff0663e58540f33bb29e08abf0e96 100644 (file)
@@ -146,7 +146,7 @@ void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd)
        if (dev->pm_domain == pd)
                return;
 
-       WARN(device_is_bound(dev),
+       WARN(pd && device_is_bound(dev),
             "PM domains can only be changed for unbound devices\n");
        dev->pm_domain = pd;
        device_pm_check_callbacks(dev);
index 6ac9a7f33b640ae1105a72f76974e8939a30ae45..301b785f9f56f5d00b7f83798b6f7cce53bf0e03 100644 (file)
@@ -162,7 +162,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.
@@ -172,16 +172,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
        queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
- * __genpd_poweron - Restore power to a given PM domain and its masters.
+ * genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
        struct gpd_link *link;
        int ret = 0;
@@ -196,11 +195,16 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
         * with it.
         */
        list_for_each_entry(link, &genpd->slave_links, slave_node) {
-               genpd_sd_counter_inc(link->master);
+               struct generic_pm_domain *master = link->master;
+
+               genpd_sd_counter_inc(master);
+
+               mutex_lock_nested(&master->lock, depth + 1);
+               ret = genpd_poweron(master, depth + 1);
+               mutex_unlock(&master->lock);
 
-               ret = genpd_poweron(link->master);
                if (ret) {
-                       genpd_sd_counter_dec(link->master);
+                       genpd_sd_counter_dec(master);
                        goto err;
                }
        }
@@ -223,20 +227,6 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
        return ret;
 }
 
-/**
- * genpd_poweron - Restore power to a given PM domain and its masters.
- * @genpd: PM domain to power up.
- */
-static int genpd_poweron(struct generic_pm_domain *genpd)
-{
-       int ret;
-
-       mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
-       mutex_unlock(&genpd->lock);
-       return ret;
-}
-
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
        return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -484,7 +474,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
        }
 
        mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
+       ret = genpd_poweron(genpd, 0);
        mutex_unlock(&genpd->lock);
 
        if (ret)
@@ -1339,8 +1329,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
        if (!link)
                return -ENOMEM;
 
-       mutex_lock(&genpd->lock);
-       mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (genpd->status == GPD_STATE_POWER_OFF
            &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1363,8 +1353,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
                genpd_sd_counter_inc(genpd);
 
  out:
-       mutex_unlock(&subdomain->lock);
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
        if (ret)
                kfree(link);
        return ret;
@@ -1385,7 +1375,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
        if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
                return -EINVAL;
 
-       mutex_lock(&genpd->lock);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
                pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1398,22 +1389,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
                if (link->slave != subdomain)
                        continue;
 
-               mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
                list_del(&link->master_node);
                list_del(&link->slave_node);
                kfree(link);
                if (subdomain->status != GPD_STATE_POWER_OFF)
                        genpd_sd_counter_dec(genpd);
 
-               mutex_unlock(&subdomain->lock);
-
                ret = 0;
                break;
        }
 
 out:
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
 
        return ret;
 }
@@ -1818,8 +1806,10 @@ int genpd_dev_pm_attach(struct device *dev)
 
        dev->pm_domain->detach = genpd_dev_pm_detach;
        dev->pm_domain->sync = genpd_dev_pm_sync;
-       ret = genpd_poweron(pd);
 
+       mutex_lock(&pd->lock);
+       ret = genpd_poweron(pd, 0);
+       mutex_unlock(&pd->lock);
 out:
        return ret ? -EPROBE_DEFER : 0;
 }
index 8812bfb9e3b89256f4a5d1468cf45484b18e4cd1..eea51569f0eb9c9e36f5a4bca90de61c8f833444 100644 (file)
@@ -133,17 +133,17 @@ static int regmap_mmio_gather_write(void *context,
        while (val_size) {
                switch (ctx->val_bytes) {
                case 1:
-                       __raw_writeb(*(u8 *)val, ctx->regs + offset);
+                       writeb(*(u8 *)val, ctx->regs + offset);
                        break;
                case 2:
-                       __raw_writew(*(u16 *)val, ctx->regs + offset);
+                       writew(*(u16 *)val, ctx->regs + offset);
                        break;
                case 4:
-                       __raw_writel(*(u32 *)val, ctx->regs + offset);
+                       writel(*(u32 *)val, ctx->regs + offset);
                        break;
 #ifdef CONFIG_64BIT
                case 8:
-                       __raw_writeq(*(u64 *)val, ctx->regs + offset);
+                       writeq(*(u64 *)val, ctx->regs + offset);
                        break;
 #endif
                default:
@@ -193,17 +193,17 @@ static int regmap_mmio_read(void *context,
        while (val_size) {
                switch (ctx->val_bytes) {
                case 1:
-                       *(u8 *)val = __raw_readb(ctx->regs + offset);
+                       *(u8 *)val = readb(ctx->regs + offset);
                        break;
                case 2:
-                       *(u16 *)val = __raw_readw(ctx->regs + offset);
+                       *(u16 *)val = readw(ctx->regs + offset);
                        break;
                case 4:
-                       *(u32 *)val = __raw_readl(ctx->regs + offset);
+                       *(u32 *)val = readl(ctx->regs + offset);
                        break;
 #ifdef CONFIG_64BIT
                case 8:
-                       *(u64 *)val = __raw_readq(ctx->regs + offset);
+                       *(u64 *)val = readq(ctx->regs + offset);
                        break;
 #endif
                default:
index ad80c85e0857064c1ed3cd6420a1ef582ad76c4a..d048d2009e897a7e76d523ec704b1fd1f77dd76b 100644 (file)
@@ -964,9 +964,9 @@ aoecmd_sleepwork(struct work_struct *work)
                ssize = get_capacity(d->gd);
                bd = bdget_disk(d->gd, 0);
                if (bd) {
-                       mutex_lock(&bd->bd_inode->i_mutex);
+                       inode_lock(bd->bd_inode);
                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
-                       mutex_unlock(&bd->bd_inode->i_mutex);
+                       inode_unlock(bd->bd_inode);
                        bdput(bd);
                }
                spin_lock_irq(&d->lock);
index 0dabc9b93725b3f1c5de274128ea7aeaa832c79d..92d6fc020a657c0694cc02c99d134fdf47c85c12 100644 (file)
@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
        }
 }
 
-static void bm_vk_free(void *ptr, int v)
+static inline void bm_vk_free(void *ptr)
 {
-       if (v)
-               vfree(ptr);
-       else
-               kfree(ptr);
+       kvfree(ptr);
 }
 
 /*
@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 {
        struct page **old_pages = b->bm_pages;
        struct page **new_pages, *page;
-       unsigned int i, bytes, vmalloced = 0;
+       unsigned int i, bytes;
        unsigned long have = b->bm_number_of_pages;
 
        BUG_ON(have == 0 && old_pages != NULL);
@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                                PAGE_KERNEL);
                if (!new_pages)
                        return NULL;
-               vmalloced = 1;
        }
 
        if (want >= have) {
@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                        page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
                        if (!page) {
                                bm_free_pages(new_pages + have, i - have);
-                               bm_vk_free(new_pages, vmalloced);
+                               bm_vk_free(new_pages);
                                return NULL;
                        }
                        /* we want to know which page it is
@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
                */
        }
 
-       if (vmalloced)
-               b->bm_flags |= BM_P_VMALLOCED;
-       else
-               b->bm_flags &= ~BM_P_VMALLOCED;
-
        return new_pages;
 }
 
@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
        if (!expect(device->bitmap))
                return;
        bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
-       bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+       bm_vk_free(device->bitmap->bm_pages);
        kfree(device->bitmap);
        device->bitmap = NULL;
 }
@@ -643,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
        unsigned long want, have, onpages; /* number of pages */
        struct page **npages, **opages = NULL;
        int err = 0, growing;
-       int opages_vmalloced;
 
        if (!expect(b))
                return -ENOMEM;
@@ -656,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
        if (capacity == b->bm_dev_capacity)
                goto out;
 
-       opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
-
        if (capacity == 0) {
                spin_lock_irq(&b->bm_lock);
                opages = b->bm_pages;
@@ -671,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
                b->bm_dev_capacity = 0;
                spin_unlock_irq(&b->bm_lock);
                bm_free_pages(opages, onpages);
-               bm_vk_free(opages, opages_vmalloced);
+               bm_vk_free(opages);
                goto out;
        }
        bits  = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -744,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
 
        spin_unlock_irq(&b->bm_lock);
        if (opages != npages)
-               bm_vk_free(opages, opages_vmalloced);
+               bm_vk_free(opages);
        if (!growing)
                b->bm_set = bm_count_bits(b);
        drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
index 96a0107a72eac1a058f931d9ec59d5df517fde3e..4de95bbff4860b2d39ad6fc40cbf95ab849929b1 100644 (file)
@@ -434,12 +434,12 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
        if (!parent || d_really_is_negative(parent))
                goto out;
        /* serialize with d_delete() */
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        /* Make sure the object is still alive */
        if (simple_positive(file->f_path.dentry)
        && kref_get_unless_zero(kref))
                ret = 0;
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        if (!ret) {
                ret = single_open(file, show, data);
                if (ret)
index b6844feb9f9bbd9484abe0f95f7734b3394b530d..34bc84efc29e99085d9ccdeb6a4fa49f5b079446 100644 (file)
@@ -536,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
 /* definition of bits in bm_flags to be used in drbd_bm_lock
  * and drbd_bitmap_io and friends. */
 enum bm_flag {
-       /* do we need to kfree, or vfree bm_pages? */
-       BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
-
        /* currently locked for bulk operation */
        BM_LOCKED_MASK = 0xf,
 
index 81ea69fee7ca183313b8e8322833062262279187..4a876785b68cd5c550dbbb1d2b63071446454081 100644 (file)
@@ -5185,8 +5185,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
 
 out_err:
        rbd_dev_unparent(rbd_dev);
-       if (parent)
-               rbd_dev_destroy(parent);
+       rbd_dev_destroy(parent);
        return ret;
 }
 
index 129d47bcc5fc8d1b9d0ef0e087bbf36cee1336be..9a92c072a485def0b2730340503d1984d5a6dc1d 100644 (file)
@@ -132,7 +132,7 @@ config SUNXI_RSB
          and AC100/AC200 ICs.
 
 config UNIPHIER_SYSTEM_BUS
-       bool "UniPhier System Bus driver"
+       tristate "UniPhier System Bus driver"
        depends on ARCH_UNIPHIER && OF
        default y
        help
index 6575c0fe6a4ea3a1e3bc1bae99010eefd9d7ff94..c3cb76b363c63c54d67343dacf4c9ec20032a95f 100644 (file)
@@ -192,8 +192,10 @@ static int __init vexpress_config_init(void)
        /* Need the config devices early, before the "normal" devices... */
        for_each_compatible_node(node, NULL, "arm,vexpress,config-bus") {
                err = vexpress_config_populate(node);
-               if (err)
+               if (err) {
+                       of_node_put(node);
                        break;
+               }
        }
 
        return err;
index dbf22719462f9481006b5c874ca99200342b4bee..ff00331bff49bdb1890333b73790ded540dc8697 100644 (file)
@@ -372,6 +372,7 @@ config HW_RANDOM_XGENE
 config HW_RANDOM_STM32
        tristate "STMicroelectronics STM32 random number generator"
        depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+       depends on HAS_IOMEM
        help
          This driver provides kernel-side support for the Random Number
          Generator hardware found on STM32 microcontrollers.
index 9fda22e3387e59030b4a16f4ab1712bb1555b0ce..7fddd8696211f0320011c964a88a37a16133c4ba 100644 (file)
@@ -68,6 +68,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/acpi.h>
 
 #ifdef CONFIG_PARISC
 #include <asm/hardware.h>      /* for register_parisc_driver() stuff */
@@ -2054,8 +2055,6 @@ static int hardcode_find_bmc(void)
 
 #ifdef CONFIG_ACPI
 
-#include <linux/acpi.h>
-
 /*
  * Once we get an ACPI failure, we don't try any more, because we go
  * through the tables sequentially.  Once we don't find a table, there
index 6b1721f978c2945f4af716636c8373fe8807dca0..4f6f94c43412c0773e176cf8d67acd73fb6ca274 100644 (file)
@@ -689,7 +689,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 {
        loff_t ret;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        switch (orig) {
        case SEEK_CUR:
                offset += file->f_pos;
@@ -706,7 +706,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
        default:
                ret = -EINVAL;
        }
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
        return ret;
 }
 
index f1d7fa45c2759b0ed97d9a54669d705f2547040c..f3f92d5fcda05b2a3692d9f100c7caa48445e5e1 100644 (file)
@@ -93,14 +93,11 @@ struct vma_data {
        spinlock_t lock;        /* Serialize access to this structure. */
        int count;              /* Number of pages allocated. */
        enum mspec_page_type type; /* Type of pages allocated. */
-       int flags;              /* See VMD_xxx below. */
        unsigned long vm_start; /* Original (unsplit) base. */
        unsigned long vm_end;   /* Original (unsplit) end. */
        unsigned long maddr[0]; /* Array of MSPEC addresses. */
 };
 
-#define VMD_VMALLOCED 0x1      /* vmalloc'd rather than kmalloc'd */
-
 /* used on shub2 to clear FOP cache in the HUB */
 static unsigned long scratch_page[MAX_NUMNODES];
 #define SH2_AMO_CACHE_ENTRIES  4
@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
                               "failed to zero page %ld\n", my_page);
        }
 
-       if (vdata->flags & VMD_VMALLOCED)
-               vfree(vdata);
-       else
-               kfree(vdata);
+       kvfree(vdata);
 }
 
 /*
@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
                                        enum mspec_page_type type)
 {
        struct vma_data *vdata;
-       int pages, vdata_size, flags = 0;
+       int pages, vdata_size;
 
        if (vma->vm_pgoff != 0)
                return -EINVAL;
@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
        vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
        if (vdata_size <= PAGE_SIZE)
                vdata = kzalloc(vdata_size, GFP_KERNEL);
-       else {
+       else
                vdata = vzalloc(vdata_size);
-               flags = VMD_VMALLOCED;
-       }
        if (!vdata)
                return -ENOMEM;
 
        vdata->vm_start = vma->vm_start;
        vdata->vm_end = vma->vm_end;
-       vdata->flags = flags;
        vdata->type = type;
        spin_lock_init(&vdata->lock);
        atomic_set(&vdata->refcnt, 1);
index 0b311fa277ef097f485d9aa92da18c262a7f10dd..b526dc15c27113d70d67bf23464c4a2e1c8c4418 100644 (file)
@@ -290,9 +290,9 @@ static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datas
 {
        struct inode *inode = file_inode(file);
        int err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = ps3flash_writeback(ps3flash_dev);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 56777f04d2d96381593b4344ac3306bb5dcafbb4..33db7406c0e274e8c39c7b0d71509a117c324022 100644 (file)
@@ -30,6 +30,8 @@ config CLKSRC_MMIO
 config DIGICOLOR_TIMER
        bool "Digicolor timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       select CLKSRC_MMIO
+       depends on HAS_IOMEM
        help
          Enables the support for the digicolor timer driver.
 
@@ -55,6 +57,7 @@ config ARMADA_370_XP_TIMER
        bool "Armada 370 and XP timer driver" if COMPILE_TEST
        depends on ARM
        select CLKSRC_OF
+       select CLKSRC_MMIO
        help
          Enables the support for the Armada 370 and XP timer driver.
 
@@ -76,6 +79,7 @@ config ORION_TIMER
 config SUN4I_TIMER
        bool "Sun4i timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Enables support for the Sun4i timer.
@@ -89,6 +93,7 @@ config SUN5I_HSTIMER
 
 config TEGRA_TIMER
        bool "Tegra timer driver" if COMPILE_TEST
+       select CLKSRC_MMIO
        depends on ARM
        help
          Enables support for the Tegra driver.
@@ -96,6 +101,7 @@ config TEGRA_TIMER
 config VT8500_TIMER
        bool "VT8500 timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Enables support for the VT8500 driver.
 
@@ -131,6 +137,7 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
 config CLKSRC_DBX500_PRCMU
        bool "Clocksource PRCMU Timer" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Use the always on PRCMU Timer as clocksource
 
@@ -248,6 +255,7 @@ config CLKSRC_EXYNOS_MCT
 config CLKSRC_SAMSUNG_PWM
        bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          This is a new clocksource driver for the PWM timer found in
          Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -257,12 +265,14 @@ config CLKSRC_SAMSUNG_PWM
 config FSL_FTM_TIMER
        bool "Freescale FlexTimer Module driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Support for Freescale FlexTimer Module (FTM) timer.
 
 config VF_PIT_TIMER
        bool
+       select CLKSRC_MMIO
        help
          Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
@@ -360,6 +370,7 @@ config CLKSRC_TANGO_XTAL
 config CLKSRC_PXA
        bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          This enables OST0 support available on PXA and SA-11x0
@@ -394,6 +405,7 @@ config CLKSRC_ST_LPC
        bool "Low power clocksource found in the LPC" if COMPILE_TEST
        select CLKSRC_OF if OF
        depends on HAS_IOMEM
+       select CLKSRC_MMIO
        help
          Enable this option to use the Low Power controller timer
          as clocksource.
index 6ee91401918eba99a33c67b554da853747a0c5fa..4da2af9694a23b267cebdfa52da624e9f43b61fe 100644 (file)
@@ -98,7 +98,8 @@ static int tc_shutdown(struct clock_event_device *d)
 
        __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
        __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
-       clk_disable(tcd->clk);
+       if (!clockevent_state_detached(d))
+               clk_disable(tcd->clk);
 
        return 0;
 }
index 9bc37c437874a6ba185799680a093598cb213948..0ca74d07005830cec5abaf92a84c59af17325616 100644 (file)
@@ -142,15 +142,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 
 try_again:
        cpu_reg = regulator_get_optional(cpu_dev, reg);
-       if (IS_ERR(cpu_reg)) {
+       ret = PTR_ERR_OR_ZERO(cpu_reg);
+       if (ret) {
                /*
                 * If cpu's regulator supply node is present, but regulator is
                 * not yet registered, we should try defering probe.
                 */
-               if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+               if (ret == -EPROBE_DEFER) {
                        dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
                                cpu);
-                       return -EPROBE_DEFER;
+                       return ret;
                }
 
                /* Try with "cpu-supply" */
@@ -159,18 +160,16 @@ try_again:
                        goto try_again;
                }
 
-               dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-                       cpu, PTR_ERR(cpu_reg));
+               dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
        }
 
        cpu_clk = clk_get(cpu_dev, NULL);
-       if (IS_ERR(cpu_clk)) {
+       ret = PTR_ERR_OR_ZERO(cpu_clk);
+       if (ret) {
                /* put regulator */
                if (!IS_ERR(cpu_reg))
                        regulator_put(cpu_reg);
 
-               ret = PTR_ERR(cpu_clk);
-
                /*
                 * If cpu's clk node is present, but clock is not yet
                 * registered, we should try defering probe.
index c35e7da1ed7a185fd95d0f0ae7f7b2961d235a0f..e979ec78b69522edefb2a49b6497c2bbb83e1e62 100644 (file)
@@ -48,11 +48,11 @@ static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
                                          bool active)
 {
        do {
-               policy = list_next_entry(policy, policy_list);
-
                /* No more policies in the list */
-               if (&policy->policy_list == &cpufreq_policy_list)
+               if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
                        return NULL;
+
+               policy = list_next_entry(policy, policy_list);
        } while (!suitable_policy(policy, active));
 
        return policy;
index bab3a514ec128254d8cff0ccdeaf02f427625d06..e0d111024d4840e8078fc0553c9ef7dabed71447 100644 (file)
@@ -387,16 +387,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
        if (!have_governor_per_policy())
                cdata->gdbs_data = dbs_data;
 
+       policy->governor_data = dbs_data;
+
        ret = sysfs_create_group(get_governor_parent_kobj(policy),
                                 get_sysfs_attr(dbs_data));
        if (ret)
                goto reset_gdbs_data;
 
-       policy->governor_data = dbs_data;
-
        return 0;
 
 reset_gdbs_data:
+       policy->governor_data = NULL;
+
        if (!have_governor_per_policy())
                cdata->gdbs_data = NULL;
        cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy,
        if (!cdbs->shared || cdbs->shared->policy)
                return -EBUSY;
 
-       policy->governor_data = NULL;
        if (!--dbs_data->usage_count) {
                sysfs_remove_group(get_governor_parent_kobj(policy),
                                   get_sysfs_attr(dbs_data));
 
+               policy->governor_data = NULL;
+
                if (!have_governor_per_policy())
                        cdata->gdbs_data = NULL;
 
                cdata->exit(dbs_data, policy->governor->initialized == 1);
                kfree(dbs_data);
+       } else {
+               policy->governor_data = NULL;
        }
 
        free_common_dbs_info(policy, cdata);
index 1d99c97defa9204f52ad8605fd5f25c1d2ad0540..096377232747652f99d8a89e0681da7134b716fe 100644 (file)
@@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void)
        }
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
        return 0;
 }
index 344058f8501a2c2ee888189950b79f615e815a02..d5657d50ac4044d5da886050e822517fe1134b6f 100644 (file)
@@ -119,7 +119,6 @@ struct cpuidle_coupled {
 
 #define CPUIDLE_COUPLED_NOT_IDLE       (-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
index 046423b0c5ca22aad3455101d3cafae7fc4dda87..f996efc56605a3d4739ae0bcac83099fd65b0751 100644 (file)
@@ -153,7 +153,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * be frozen safely.
         */
        index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-       if (index >= 0)
+       if (index > 0)
                enter_freeze_proper(drv, dev, index);
 
        return index;
index 3dd69df9c970fa0e86417d6f4e0a147fe8f65885..07d494276aad04195c9e7247089649b302d3082c 100644 (file)
@@ -381,6 +381,7 @@ config CRYPTO_DEV_BFIN_CRC
 
 config CRYPTO_DEV_ATMEL_AES
        tristate "Support for Atmel AES hw accelerator"
+       depends on HAS_DMA
        depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST
        select CRYPTO_AES
        select CRYPTO_AEAD
index 5621612ee92169da0e7f934a6c45a4ee29690f97..3eb3f1279fb7e93306f903c24ce7719ee0672b70 100644 (file)
@@ -280,6 +280,7 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
        case AES_GCMHR(2):
        case AES_GCMHR(3):
                snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
+               break;
 
        default:
                snprintf(tmp, sz, "0x%02x", offset);
@@ -399,7 +400,7 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
 {
        int err;
 
-       err = clk_prepare_enable(dd->iclk);
+       err = clk_enable(dd->iclk);
        if (err)
                return err;
 
@@ -429,7 +430,7 @@ static int atmel_aes_hw_version_init(struct atmel_aes_dev *dd)
 
        dev_info(dd->dev, "version: 0x%x\n", dd->hw_version);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
        return 0;
 }
 
@@ -447,7 +448,7 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd)
 
 static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
 {
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
        dd->flags &= ~AES_FLAGS_BUSY;
 
        if (dd->is_async)
@@ -2090,10 +2091,14 @@ static int atmel_aes_probe(struct platform_device *pdev)
                goto res_err;
        }
 
-       err = atmel_aes_hw_version_init(aes_dd);
+       err = clk_prepare(aes_dd->iclk);
        if (err)
                goto res_err;
 
+       err = atmel_aes_hw_version_init(aes_dd);
+       if (err)
+               goto iclk_unprepare;
+
        atmel_aes_get_cap(aes_dd);
 
        err = atmel_aes_buff_init(aes_dd);
@@ -2126,6 +2131,8 @@ err_algs:
 err_aes_dma:
        atmel_aes_buff_cleanup(aes_dd);
 err_aes_buff:
+iclk_unprepare:
+       clk_unprepare(aes_dd->iclk);
 res_err:
        tasklet_kill(&aes_dd->done_task);
        tasklet_kill(&aes_dd->queue_task);
@@ -2154,6 +2161,8 @@ static int atmel_aes_remove(struct platform_device *pdev)
        atmel_aes_dma_cleanup(aes_dd);
        atmel_aes_buff_cleanup(aes_dd);
 
+       clk_unprepare(aes_dd->iclk);
+
        return 0;
 }
 
index 20de861aa0ea6c275edb746e14fcb372344fcd48..8bf9914d4d150b439e087688b51c884fdd1d022c 100644 (file)
@@ -782,7 +782,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err)
        dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
                        SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
 
        if (req->base.complete)
                req->base.complete(&req->base, err);
@@ -795,7 +795,7 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
 {
        int err;
 
-       err = clk_prepare_enable(dd->iclk);
+       err = clk_enable(dd->iclk);
        if (err)
                return err;
 
@@ -822,7 +822,7 @@ static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
        dev_info(dd->dev,
                        "version: 0x%x\n", dd->hw_version);
 
-       clk_disable_unprepare(dd->iclk);
+       clk_disable(dd->iclk);
 }
 
 static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
@@ -1410,6 +1410,10 @@ static int atmel_sha_probe(struct platform_device *pdev)
                goto res_err;
        }
 
+       err = clk_prepare(sha_dd->iclk);
+       if (err)
+               goto res_err;
+
        atmel_sha_hw_version_init(sha_dd);
 
        atmel_sha_get_cap(sha_dd);
@@ -1421,12 +1425,12 @@ static int atmel_sha_probe(struct platform_device *pdev)
                        if (IS_ERR(pdata)) {
                                dev_err(&pdev->dev, "platform data not available\n");
                                err = PTR_ERR(pdata);
-                               goto res_err;
+                               goto iclk_unprepare;
                        }
                }
                if (!pdata->dma_slave) {
                        err = -ENXIO;
-                       goto res_err;
+                       goto iclk_unprepare;
                }
                err = atmel_sha_dma_init(sha_dd, pdata);
                if (err)
@@ -1457,6 +1461,8 @@ err_algs:
        if (sha_dd->caps.has_dma)
                atmel_sha_dma_cleanup(sha_dd);
 err_sha_dma:
+iclk_unprepare:
+       clk_unprepare(sha_dd->iclk);
 res_err:
        tasklet_kill(&sha_dd->done_task);
 sha_dd_err:
@@ -1483,12 +1489,7 @@ static int atmel_sha_remove(struct platform_device *pdev)
        if (sha_dd->caps.has_dma)
                atmel_sha_dma_cleanup(sha_dd);
 
-       iounmap(sha_dd->io_base);
-
-       clk_put(sha_dd->iclk);
-
-       if (sha_dd->irq >= 0)
-               free_irq(sha_dd->irq, sha_dd);
+       clk_unprepare(sha_dd->iclk);
 
        return 0;
 }
index 8abb4bc548cc06a7a8ae97779701d308255ff050..69d4a1326feefa6ff4c404d7e093a658728f6f4a 100644 (file)
@@ -534,8 +534,8 @@ static int caam_probe(struct platform_device *pdev)
         * long pointers in master configuration register
         */
        clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH |
-                     MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ?
-                                       MCFGR_LONG_PTR : 0));
+                     MCFGR_AWCACHE_BUFF | MCFGR_WDENABLE |
+                     (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
 
        /*
         *  Read the Compile Time paramters and SCFGR to determine
index 0643e3366e3309de88a03e687a2d5353f5715a22..c0656e7f37b5993672002a8a192dc1c9dcf0c4bd 100644 (file)
@@ -306,7 +306,7 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
                return -ENOMEM;
 
        dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0);
-       if (!dma->cache_pool)
+       if (!dma->padding_pool)
                return -ENOMEM;
 
        cesa->dma = dma;
index 0ac0ba86761110db09b9ec68c6c213d6529784eb..1e480f140663530a699d8bf51b402bce394a935e 100644 (file)
@@ -389,7 +389,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
 {
        unsigned int base_cnt, cur_cnt;
        unsigned char ae;
-       unsigned int times = MAX_RETRY_TIMES;
+       int times = MAX_RETRY_TIMES;
 
        for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
                qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
@@ -402,7 +402,7 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
                        cur_cnt &= 0xffff;
                } while (times-- && (cur_cnt == base_cnt));
 
-               if (!times) {
+               if (times < 0) {
                        pr_err("QAT: AE%d is inactive!!\n", ae);
                        return -EFAULT;
                }
@@ -453,7 +453,11 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
        void __iomem *csr_addr =
                        (void __iomem *)((uintptr_t)handle->hal_ep_csr_addr_v +
                        ESRAM_AUTO_INIT_CSR_OFFSET);
-       unsigned int csr_val, times = 30;
+       unsigned int csr_val;
+       int times = 30;
+
+       if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+               return 0;
 
        csr_val = ADF_CSR_RD(csr_addr, 0);
        if ((csr_val & ESRAM_AUTO_TINIT) && (csr_val & ESRAM_AUTO_TINIT_DONE))
@@ -467,7 +471,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
                qat_hal_wait_cycles(handle, 0, ESRAM_AUTO_INIT_USED_CYCLES, 0);
                csr_val = ADF_CSR_RD(csr_addr, 0);
        } while (!(csr_val & ESRAM_AUTO_TINIT_DONE) && times--);
-       if ((!times)) {
+       if ((times < 0)) {
                pr_err("QAT: Fail to init eSram!\n");
                return -EFAULT;
        }
@@ -658,7 +662,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
                        ret = qat_hal_wait_cycles(handle, ae, 20, 1);
                } while (ret && times--);
 
-               if (!times) {
+               if (times < 0) {
                        pr_err("QAT: clear GPR of AE %d failed", ae);
                        return -EINVAL;
                }
@@ -693,14 +697,12 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        struct adf_bar *misc_bar =
                        &pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)];
-       struct adf_bar *sram_bar =
-                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+       struct adf_bar *sram_bar;
 
        handle = kzalloc(sizeof(*handle), GFP_KERNEL);
        if (!handle)
                return -ENOMEM;
 
-       handle->hal_sram_addr_v = sram_bar->virt_addr;
        handle->hal_cap_g_ctl_csr_addr_v =
                (void __iomem *)((uintptr_t)misc_bar->virt_addr +
                                 ICP_QAT_CAP_OFFSET);
@@ -714,6 +716,11 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
                (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
                                 LOCAL_TO_XFER_REG_OFFSET);
        handle->pci_dev = pci_info->pci_dev;
+       if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+               sram_bar =
+                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+               handle->hal_sram_addr_v = sram_bar->virt_addr;
+       }
        handle->fw_auth = (handle->pci_dev->device ==
                           ADF_DH895XCC_PCI_DEVICE_ID) ? false : true;
        handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL);
index 66f729eaf00bb99f09ef5960daf8c7630ceccb96..20c9539abc36e887dea47d00982e308be412d93e 100644 (file)
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
 
 # add asic specific block
-amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o gmc_v7_0.o cik_ih.o kv_smc.o kv_dpm.o \
+amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
        ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
        amdgpu_amdkfd_gfx_v7.o
 
@@ -34,6 +34,7 @@ amdgpu-y += \
 
 # add GMC block
 amdgpu-y += \
+       gmc_v7_0.o \
        gmc_v8_0.o
 
 # add IH block
index 313b0cc8d676d90d1f4073c28579f096afea1a74..82edf95b7740d7fe9070cba62009374c08c34562 100644 (file)
@@ -2278,60 +2278,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_temperature((adev))
+             (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+             (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_fan_control_mode((adev))
+             (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-               (adev)->pm.funcs->get_sclk((adev), (l))
+               (adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->get_mclk((adev), (l))
+             (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->force_performance_level((adev), (l))
+             (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_uvd((adev), (g))
+             (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_vce((adev), (g))
+             (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
        (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
index 0e1376317683e4de30803a987450dba3e01b9621..362bedc9e50791ee6a9b1e60f4885a87ba158677 100644 (file)
@@ -154,7 +154,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 {
        return (struct kfd2kgd_calls *)&kfd2kgd;
 }
index 79fa5c7de856eab635ea9010ab0b8bfc446c37c2..04b744d64b57a6355f38ff071f48836148880257 100644 (file)
@@ -115,7 +115,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version
 };
 
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 {
        return (struct kfd2kgd_calls *)&kfd2kgd;
 }
index 6f89f8e034d0eb05b96378581f8dfa7dee0fb319..b882e8175615fba482ac9c8b8f29561e7549dc18 100644 (file)
@@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
        unsigned i;
 
-       amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
        if (!error) {
+               amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
                /* Sort the buffer list from the smallest to largest buffer,
                 * which affects the order of buffers in the LRU list.
                 * This assures that the smallest buffers are added first
index b5dbbb57349190ce4b28409547781c515283994f..9c1af8976befea20cf371cc4a15f226dbe434d75 100644 (file)
@@ -256,11 +256,11 @@ static struct pci_device_id pciidlist[] = {
        {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
 #endif
        /* topaz */
-       {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+       {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
        /* tonga */
        {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
        {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
index cfb6caad2a73a4264ab9386f1c6616cf75ef0c77..919146780a15a91434f7383e666eedabdf402ca0 100644 (file)
@@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
        if (!adev->mode_info.mode_config_initialized)
                return 0;
 
+       /* don't init fbdev if there are no connectors */
+       if (list_empty(&adev->ddev->mode_config.connector_list))
+               return 0;
+
        /* select 8 bpp console on low vram cards */
        if (adev->mc.real_vram_size <= (32*1024*1024))
                bpp_sel = 8;
index c3ce103b6a33b2ef3a0a6f7f14877e18150ca405..b8fbbd7699e4586e13e57905b0cef818f6e43e6b 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -261,6 +262,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
                                       AMDGPU_GEM_DOMAIN_OA);
 
        bo->flags = flags;
+
+       /* For architectures that don't support WC memory,
+        * mask out the WC flag from the BO
+        */
+       if (!drm_arch_can_wc_memory())
+               bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
        amdgpu_fill_placement_to_bo(bo, placement);
        /* Kernel allocation are uninterruptible */
        r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
@@ -399,7 +407,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                }
                if (fpfn > bo->placements[i].fpfn)
                        bo->placements[i].fpfn = fpfn;
-               if (lpfn && lpfn < bo->placements[i].lpfn)
+               if (!bo->placements[i].lpfn ||
+                   (lpfn && lpfn < bo->placements[i].lpfn))
                        bo->placements[i].lpfn = lpfn;
                bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
        }
index 5ee9a069027885989f8aad55e9340e1a9d3424ea..b9d0d55f6b47df33ab7ebf7024fe7ca6e821892a 100644 (file)
@@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle)
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
        switch (adev->asic_type) {
-               case CHIP_TONGA:
-               case CHIP_FIJI:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
-               default:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
+       case CHIP_TONGA:
+       case CHIP_FIJI:
+               adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+               break;
+       case CHIP_CARRIZO:
+       case CHIP_STONEY:
+               adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+               break;
+       /* These chips don't have powerplay implemenations */
+       case CHIP_BONAIRE:
+       case CHIP_HAWAII:
+       case CHIP_KABINI:
+       case CHIP_MULLINS:
+       case CHIP_KAVERI:
+       case CHIP_TOPAZ:
+       default:
+               adev->pp_enabled = false;
+               break;
        }
 #else
        adev->pp_enabled = false;
index 78e9b0f14661a0d0f404f67df3789b052dcd9133..d1f234dd21261e06764b9e5b623d11ff08f20fe4 100644 (file)
@@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
        seq_printf(m, "rptr: 0x%08x [%5d]\n",
                   rptr, rptr);
 
-       rptr_next = ~0;
+       rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
        seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
                   ring->wptr, ring->wptr);
index 8a1752ff3d8e55a1d8b8ab3061f5c9c425058d0f..55cf05e1c81c398fc747f598d1ce0bc78a38a435 100644 (file)
@@ -808,7 +808,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                        flags |= AMDGPU_PTE_SNOOPED;
        }
 
-       if (adev->asic_type >= CHIP_TOPAZ)
+       if (adev->asic_type >= CHIP_TONGA)
                flags |= AMDGPU_PTE_EXECUTABLE;
 
        flags |= AMDGPU_PTE_READABLE;
index aefc668e6b5d79e65873e973275d5f0beb7b10d3..9599f7559b3dc86308d2c375739d0cd20ee60941 100644 (file)
@@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
        const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
                AMDGPU_VM_PTE_COUNT * 8);
-       unsigned pd_size, pd_entries, pts_size;
+       unsigned pd_size, pd_entries;
        int i, r;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        pd_entries = amdgpu_vm_num_pdes(adev);
 
        /* allocate page table array */
-       pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-       vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+       vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
        if (vm->page_tables == NULL) {
                DRM_ERROR("Cannot allocate memory for page table array\n");
                return -ENOMEM;
@@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
        for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
                amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-       kfree(vm->page_tables);
+       drm_free_large(vm->page_tables);
 
        amdgpu_bo_unref(&vm->page_directory);
        fence_put(vm->page_directory_fence);
index 72793f93e2fcc9989e5f936ec1d43ba0593ac69e..6c76139de1c9c5992579a7eb44d1f6f4eff9da9a 100644 (file)
@@ -4738,6 +4738,22 @@ static int gfx_v7_0_early_init(void *handle)
        return 0;
 }
 
+static int gfx_v7_0_late_init(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+       if (r)
+               return r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
        struct amdgpu_ring *ring;
@@ -4890,6 +4906,8 @@ static int gfx_v7_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        gfx_v7_0_cp_enable(adev, false);
        gfx_v7_0_rlc_stop(adev);
        gfx_v7_0_fini_pg(adev);
@@ -5527,7 +5545,7 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 
 const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
        .early_init = gfx_v7_0_early_init,
-       .late_init = NULL,
+       .late_init = gfx_v7_0_late_init,
        .sw_init = gfx_v7_0_sw_init,
        .sw_fini = gfx_v7_0_sw_fini,
        .hw_init = gfx_v7_0_hw_init,
index 13235d84e5a67d45ca52bf4de9536d1c22a26dc6..8f8ec37ecd883599b416773a0a6a579da6131515 100644 (file)
@@ -111,7 +111,6 @@ MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
-MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 
 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
@@ -828,7 +827,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
        adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
        adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-       if (adev->asic_type != CHIP_STONEY) {
+       if ((adev->asic_type != CHIP_STONEY) &&
+           (adev->asic_type != CHIP_TOPAZ)) {
                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
                if (!err) {
@@ -3851,10 +3851,16 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
                        if (r)
                                return -EINVAL;
 
-                       r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-                                                       AMDGPU_UCODE_ID_CP_MEC1);
-                       if (r)
-                               return -EINVAL;
+                       if (adev->asic_type == CHIP_TOPAZ) {
+                               r = gfx_v8_0_cp_compute_load_microcode(adev);
+                               if (r)
+                                       return r;
+                       } else {
+                               r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+                                                                                AMDGPU_UCODE_ID_CP_MEC1);
+                               if (r)
+                                       return -EINVAL;
+                       }
                }
        }
 
@@ -3901,6 +3907,8 @@ static int gfx_v8_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+       amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
        gfx_v8_0_cp_enable(adev, false);
        gfx_v8_0_rlc_stop(adev);
        gfx_v8_0_cp_compute_fini(adev);
@@ -4186,7 +4194,18 @@ static int gfx_v8_0_soft_reset(void *handle)
                gfx_v8_0_cp_gfx_enable(adev, false);
 
                /* Disable MEC parsing/prefetching */
-               /* XXX todo */
+               gfx_v8_0_cp_compute_enable(adev, false);
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 1);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 1);
+                       WREG32(mmGMCON_DEBUG, tmp);
+
+                       udelay(50);
+               }
 
                if (grbm_soft_reset) {
                        tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4234,16 @@ static int gfx_v8_0_soft_reset(void *handle)
                        WREG32(mmSRBM_SOFT_RESET, tmp);
                        tmp = RREG32(mmSRBM_SOFT_RESET);
                }
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 0);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 0);
+                       WREG32(mmGMCON_DEBUG, tmp);
+               }
+
                /* Wait a little for things to settle down */
                udelay(50);
                gfx_v8_0_print_status((void *)adev);
@@ -4308,6 +4337,14 @@ static int gfx_v8_0_late_init(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int r;
 
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+       if (r)
+               return r;
+
+       r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+       if (r)
+               return r;
+
        /* requires IBs so do in late init after IB pool is initialized */
        r = gfx_v8_0_do_edc_gpr_workarounds(adev);
        if (r)
index 3f956065d069125df0200f78527437134bf7f6d1..8aa2991ab379af7c6d981c1e01734f6b83da3692 100644 (file)
@@ -42,9 +42,39 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
+
+static const u32 golden_settings_iceland_a11[] =
+{
+       mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+       mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+       mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
+{
+       switch (adev->asic_type) {
+       case CHIP_TOPAZ:
+               amdgpu_program_register_sequence(adev,
+                                                iceland_mgcg_cgcg_init,
+                                                (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
+               amdgpu_program_register_sequence(adev,
+                                                golden_settings_iceland_a11,
+                                                (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
+               break;
+       default:
+               break;
+       }
+}
 
 /**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
+ * gmc7_mc_wait_for_idle - wait for MC idle callback.
  *
  * @adev: amdgpu_device pointer
  *
@@ -132,13 +162,20 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
        case CHIP_HAWAII:
                chip_name = "hawaii";
                break;
+       case CHIP_TOPAZ:
+               chip_name = "topaz";
+               break;
        case CHIP_KAVERI:
        case CHIP_KABINI:
                return 0;
        default: BUG();
        }
 
-       snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+       if (adev->asic_type == CHIP_TOPAZ)
+               snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+       else
+               snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+
        err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
        if (err)
                goto out;
@@ -984,6 +1021,8 @@ static int gmc_v7_0_hw_init(void *handle)
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       gmc_v7_0_init_golden_registers(adev);
+
        gmc_v7_0_mc_program(adev);
 
        if (!(adev->flags & AMD_IS_APU)) {
index c0c9a0101eb453c3139b9796ac8449350f0fb4bd..3efd45546241afc65e5d7db1fbc541fda946857b 100644 (file)
@@ -42,9 +42,7 @@
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 
-MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
-MODULE_FIRMWARE("amdgpu/fiji_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -75,19 +73,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
-static const u32 golden_settings_iceland_a11[] =
-{
-       mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
-       mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
-};
-
-static const u32 iceland_mgcg_cgcg_init[] =
-{
-       mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
-};
-
 static const u32 cz_mgcg_cgcg_init[] =
 {
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
@@ -102,14 +87,6 @@ static const u32 stoney_mgcg_cgcg_init[] =
 static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
        switch (adev->asic_type) {
-       case CHIP_TOPAZ:
-               amdgpu_program_register_sequence(adev,
-                                                iceland_mgcg_cgcg_init,
-                                                (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
-               amdgpu_program_register_sequence(adev,
-                                                golden_settings_iceland_a11,
-                                                (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
-               break;
        case CHIP_FIJI:
                amdgpu_program_register_sequence(adev,
                                                 fiji_mgcg_cgcg_init,
@@ -229,15 +206,10 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
        DRM_DEBUG("\n");
 
        switch (adev->asic_type) {
-       case CHIP_TOPAZ:
-               chip_name = "topaz";
-               break;
        case CHIP_TONGA:
                chip_name = "tonga";
                break;
        case CHIP_FIJI:
-               chip_name = "fiji";
-               break;
        case CHIP_CARRIZO:
        case CHIP_STONEY:
                return 0;
@@ -1007,7 +979,7 @@ static int gmc_v8_0_hw_init(void *handle)
 
        gmc_v8_0_mc_program(adev);
 
-       if (!(adev->flags & AMD_IS_APU)) {
+       if (adev->asic_type == CHIP_TONGA) {
                r = gmc_v8_0_mc_load_microcode(adev);
                if (r) {
                        DRM_ERROR("Failed to load MC firmware!\n");
index 966d4b2ed9dad0527a2d0246b23731f1d802d9e2..090486c182497ba8de2aaa7840242be9cf4750aa 100644 (file)
@@ -432,7 +432,7 @@ static uint32_t iceland_smu_get_mask_for_fw_type(uint32_t fw_type)
                case AMDGPU_UCODE_ID_CP_ME:
                        return UCODE_ID_CP_ME_MASK;
                case AMDGPU_UCODE_ID_CP_MEC1:
-                       return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK | UCODE_ID_CP_MEC_JT2_MASK;
+                       return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK;
                case AMDGPU_UCODE_ID_CP_MEC2:
                        return UCODE_ID_CP_MEC_MASK;
                case AMDGPU_UCODE_ID_RLC_G:
@@ -522,12 +522,6 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev)
                return -EINVAL;
        }
 
-       if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_CP_MEC_JT2,
-                       &toc->entry[toc->num_entries++])) {
-               DRM_ERROR("Failed to get firmware entry for MEC_JT2\n");
-               return -EINVAL;
-       }
-
        if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_SDMA0,
                        &toc->entry[toc->num_entries++])) {
                DRM_ERROR("Failed to get firmware entry for SDMA0\n");
@@ -550,8 +544,8 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev)
                        UCODE_ID_CP_ME_MASK |
                        UCODE_ID_CP_PFP_MASK |
                        UCODE_ID_CP_MEC_MASK |
-                       UCODE_ID_CP_MEC_JT1_MASK |
-                       UCODE_ID_CP_MEC_JT2_MASK;
+                       UCODE_ID_CP_MEC_JT1_MASK;
+
 
        if (iceland_send_msg_to_smc_with_parameter_without_waiting(adev, PPSMC_MSG_LoadUcodes, fw_to_load)) {
                DRM_ERROR("Fail to request SMU load ucode\n");
index f4a1346525febf9c9a463e32525c635f70873a93..0497784b36523086a029bb5a08cd977c9a9f6372 100644 (file)
@@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
 
 static int tonga_dpm_suspend(void *handle)
 {
-       return 0;
+       return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-       int ret;
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       mutex_lock(&adev->pm.mutex);
-
-       ret = tonga_smu_start(adev);
-       if (ret) {
-               DRM_ERROR("SMU start failed\n");
-               goto fail;
-       }
-
-fail:
-       mutex_unlock(&adev->pm.mutex);
-       return ret;
+       return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,
index 652e76644c31cab37fee1964797d890749fbf0a7..89f5a1ff6f43aba335945d4f7a2643c0310623f8 100644 (file)
@@ -61,6 +61,7 @@
 #include "vi.h"
 #include "vi_dpm.h"
 #include "gmc_v8_0.h"
+#include "gmc_v7_0.h"
 #include "gfx_v8_0.h"
 #include "sdma_v2_4.h"
 #include "sdma_v3_0.h"
@@ -1109,10 +1110,10 @@ static const struct amdgpu_ip_block_version topaz_ip_blocks[] =
        },
        {
                .type = AMD_IP_BLOCK_TYPE_GMC,
-               .major = 8,
-               .minor = 0,
+               .major = 7,
+               .minor = 4,
                .rev = 0,
-               .funcs = &gmc_v8_0_ip_funcs,
+               .funcs = &gmc_v7_0_ip_funcs,
        },
        {
                .type = AMD_IP_BLOCK_TYPE_IH,
@@ -1442,8 +1443,7 @@ static int vi_common_early_init(void *handle)
                break;
        case CHIP_FIJI:
                adev->has_uvd = true;
-               adev->cg_flags = AMDGPU_CG_SUPPORT_UVD_MGCG |
-                               AMDGPU_CG_SUPPORT_VCE_MGCG;
+               adev->cg_flags = 0;
                adev->pg_flags = 0;
                adev->external_rev_id = adev->rev_id + 0x3c;
                break;
index 9be007081b72a8b5c75b4baf1fe01e92bfe792bc..a902ae037398389cf6e19c1ab7379ca64e6a46ed 100644 (file)
@@ -194,7 +194,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 
        kfree(p);
 
-       kfree((void *)work);
+       kfree(work);
 }
 
 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
index 8f5d5edcf193b67a67f6990824f8ef8260e1afa5..aa67244a77ae38cf69761bc964c5f1d4400874c5 100644 (file)
@@ -64,6 +64,11 @@ static int pp_sw_init(void *handle)
        if (ret == 0)
                ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+       if (ret)
+               printk("amdgpu: powerplay initialization failed\n");
+       else
+               printk("amdgpu: powerplay initialized\n");
+
        return ret;
 }
 
index 873a8d264d5c6aa7af31322fd5818dc1f46c7317..ec222c665602df7113cb4ec75b3fc2633d86a8a4 100644 (file)
@@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
                                UCODE_ID_CP_MEC_JT1_MASK |
                                UCODE_ID_CP_MEC_JT2_MASK;
 
+       if (smumgr->chip_id == CHIP_STONEY)
+               fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
        cz_request_smu_load_fw(smumgr);
        cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
        return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
                        enum cz_scratch_entry firmware_enum)
 {
        uint8_t ret = 0;
@@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_SDMA0;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-               ret = UCODE_ID_SDMA1;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_SDMA0;
+               else
+                       ret = UCODE_ID_SDMA1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
                ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_CP_MEC_JT1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-               ret = UCODE_ID_CP_MEC_JT2;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_CP_MEC_JT1;
+               else
+                       ret = UCODE_ID_CP_MEC_JT2;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
                ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = type;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = TASK_TYPE_UCODE_LOAD;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
 
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr)
 
        for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-               firmware_type = cz_translate_firmware_enum_to_arg(
+               firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
                                        firmware_list[i]);
 
                ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
index 57cccd68ca522548c6c098ee72798a5b3369def0..7c523060a076f715746829004657b227a9308640 100644 (file)
@@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev,
        }
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-                               struct drm_atomic_state *old_state,
-                               struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc)
 {
        struct drm_plane *plane;
        struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev,
 
        return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                if (old_state->legacy_cursor_update)
                        continue;
 
-               if (!framebuffer_changed(dev, old_state, crtc))
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
                        continue;
 
                ret = drm_crtc_vblank_get(crtc);
index 6ed90a2437e50438c713fc028f28705e7954b806..8ae13de272c43a7f79822098318a5c5317b54483 100644 (file)
@@ -803,12 +803,33 @@ static struct drm_dp_mst_branch *drm_dp_add_mst_branch_device(u8 lct, u8 *rad)
        return mstb;
 }
 
+static void drm_dp_free_mst_port(struct kref *kref);
+
+static void drm_dp_free_mst_branch_device(struct kref *kref)
+{
+       struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
+       if (mstb->port_parent) {
+               if (list_empty(&mstb->port_parent->next))
+                       kref_put(&mstb->port_parent->kref, drm_dp_free_mst_port);
+       }
+       kfree(mstb);
+}
+
 static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 {
        struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref);
        struct drm_dp_mst_port *port, *tmp;
        bool wake_tx = false;
 
+       /*
+        * init kref again to be used by ports to remove mst branch when it is
+        * not needed anymore
+        */
+       kref_init(kref);
+
+       if (mstb->port_parent && list_empty(&mstb->port_parent->next))
+               kref_get(&mstb->port_parent->kref);
+
        /*
         * destroy all ports - don't need lock
         * as there are no more references to the mst branch
@@ -835,7 +856,8 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 
        if (wake_tx)
                wake_up(&mstb->mgr->tx_waitq);
-       kfree(mstb);
+
+       kref_put(kref, drm_dp_free_mst_branch_device);
 }
 
 static void drm_dp_put_mst_branch_device(struct drm_dp_mst_branch *mstb)
@@ -883,6 +905,7 @@ static void drm_dp_destroy_port(struct kref *kref)
                         * from an EDID retrieval */
 
                        mutex_lock(&mgr->destroy_connector_lock);
+                       kref_get(&port->parent->kref);
                        list_add(&port->next, &mgr->destroy_connector_list);
                        mutex_unlock(&mgr->destroy_connector_lock);
                        schedule_work(&mgr->destroy_connector_work);
@@ -1018,18 +1041,27 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port)
        return send_link;
 }
 
-static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb,
-                                  struct drm_dp_mst_port *port)
+static void drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid)
 {
        int ret;
-       if (port->dpcd_rev >= 0x12) {
-               port->guid_valid = drm_dp_validate_guid(mstb->mgr, port->guid);
-               if (!port->guid_valid) {
-                       ret = drm_dp_send_dpcd_write(mstb->mgr,
-                                                    port,
-                                                    DP_GUID,
-                                                    16, port->guid);
-                       port->guid_valid = true;
+
+       memcpy(mstb->guid, guid, 16);
+
+       if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) {
+               if (mstb->port_parent) {
+                       ret = drm_dp_send_dpcd_write(
+                                       mstb->mgr,
+                                       mstb->port_parent,
+                                       DP_GUID,
+                                       16,
+                                       mstb->guid);
+               } else {
+
+                       ret = drm_dp_dpcd_write(
+                                       mstb->mgr->aux,
+                                       DP_GUID,
+                                       mstb->guid,
+                                       16);
                }
        }
 }
@@ -1086,7 +1118,6 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
        port->dpcd_rev = port_msg->dpcd_revision;
        port->num_sdp_streams = port_msg->num_sdp_streams;
        port->num_sdp_stream_sinks = port_msg->num_sdp_stream_sinks;
-       memcpy(port->guid, port_msg->peer_guid, 16);
 
        /* manage mstb port lists with mgr lock - take a reference
           for this list */
@@ -1099,11 +1130,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
 
        if (old_ddps != port->ddps) {
                if (port->ddps) {
-                       drm_dp_check_port_guid(mstb, port);
                        if (!port->input)
                                drm_dp_send_enum_path_resources(mstb->mgr, mstb, port);
                } else {
-                       port->guid_valid = false;
                        port->available_pbn = 0;
                        }
        }
@@ -1130,13 +1159,11 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
                        drm_dp_put_port(port);
                        goto out;
                }
-               if (port->port_num >= DP_MST_LOGICAL_PORT_0) {
-                       port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc);
-                       drm_mode_connector_set_tile_property(port->connector);
-               }
+
+               drm_mode_connector_set_tile_property(port->connector);
+
                (*mstb->mgr->cbs->register_connector)(port->connector);
        }
-
 out:
        /* put reference to this port */
        drm_dp_put_port(port);
@@ -1161,11 +1188,9 @@ static void drm_dp_update_port(struct drm_dp_mst_branch *mstb,
        port->ddps = conn_stat->displayport_device_plug_status;
 
        if (old_ddps != port->ddps) {
+               dowork = true;
                if (port->ddps) {
-                       drm_dp_check_port_guid(mstb, port);
-                       dowork = true;
                } else {
-                       port->guid_valid = false;
                        port->available_pbn = 0;
                }
        }
@@ -1222,13 +1247,14 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper(
        struct drm_dp_mst_branch *found_mstb;
        struct drm_dp_mst_port *port;
 
+       if (memcmp(mstb->guid, guid, 16) == 0)
+               return mstb;
+
+
        list_for_each_entry(port, &mstb->ports, next) {
                if (!port->mstb)
                        continue;
 
-               if (port->guid_valid && memcmp(port->guid, guid, 16) == 0)
-                       return port->mstb;
-
                found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid);
 
                if (found_mstb)
@@ -1247,10 +1273,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid(
        /* find the port by iterating down */
        mutex_lock(&mgr->lock);
 
-       if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0)
-               mstb = mgr->mst_primary;
-       else
-               mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
+       mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid);
 
        if (mstb)
                kref_get(&mstb->kref);
@@ -1271,8 +1294,13 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m
                if (port->input)
                        continue;
 
-               if (!port->ddps)
+               if (!port->ddps) {
+                       if (port->cached_edid) {
+                               kfree(port->cached_edid);
+                               port->cached_edid = NULL;
+                       }
                        continue;
+               }
 
                if (!port->available_pbn)
                        drm_dp_send_enum_path_resources(mgr, mstb, port);
@@ -1283,6 +1311,12 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m
                                drm_dp_check_and_send_link_address(mgr, mstb_child);
                                drm_dp_put_mst_branch_device(mstb_child);
                        }
+               } else if (port->pdt == DP_PEER_DEVICE_SST_SINK ||
+                       port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV) {
+                       if (!port->cached_edid) {
+                               port->cached_edid =
+                                       drm_get_edid(port->connector, &port->aux.ddc);
+                       }
                }
        }
 }
@@ -1302,6 +1336,8 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work)
                drm_dp_check_and_send_link_address(mgr, mstb);
                drm_dp_put_mst_branch_device(mstb);
        }
+
+       (*mgr->cbs->hotplug)(mgr);
 }
 
 static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr,
@@ -1555,10 +1591,12 @@ static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr,
                                       txmsg->reply.u.link_addr.ports[i].num_sdp_streams,
                                       txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks);
                        }
+
+                       drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid);
+
                        for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) {
                                drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]);
                        }
-                       (*mgr->cbs->hotplug)(mgr);
                }
        } else {
                mstb->link_address_sent = false;
@@ -1602,6 +1640,37 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
        return 0;
 }
 
+static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb)
+{
+       if (!mstb->port_parent)
+               return NULL;
+
+       if (mstb->port_parent->mstb != mstb)
+               return mstb->port_parent;
+
+       return drm_dp_get_last_connected_port_to_mstb(mstb->port_parent->parent);
+}
+
+static struct drm_dp_mst_branch *drm_dp_get_last_connected_port_and_mstb(struct drm_dp_mst_topology_mgr *mgr,
+                                                                        struct drm_dp_mst_branch *mstb,
+                                                                        int *port_num)
+{
+       struct drm_dp_mst_branch *rmstb = NULL;
+       struct drm_dp_mst_port *found_port;
+       mutex_lock(&mgr->lock);
+       if (mgr->mst_primary) {
+               found_port = drm_dp_get_last_connected_port_to_mstb(mstb);
+
+               if (found_port) {
+                       rmstb = found_port->parent;
+                       kref_get(&rmstb->kref);
+                       *port_num = found_port->port_num;
+               }
+       }
+       mutex_unlock(&mgr->lock);
+       return rmstb;
+}
+
 static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
                                   struct drm_dp_mst_port *port,
                                   int id,
@@ -1609,13 +1678,18 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 {
        struct drm_dp_sideband_msg_tx *txmsg;
        struct drm_dp_mst_branch *mstb;
-       int len, ret;
+       int len, ret, port_num;
        u8 sinks[DRM_DP_MAX_SDP_STREAMS];
        int i;
 
+       port_num = port->port_num;
        mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent);
-       if (!mstb)
-               return -EINVAL;
+       if (!mstb) {
+               mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num);
+
+               if (!mstb)
+                       return -EINVAL;
+       }
 
        txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
        if (!txmsg) {
@@ -1627,7 +1701,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
                sinks[i] = i;
 
        txmsg->dst = mstb;
-       len = build_allocate_payload(txmsg, port->port_num,
+       len = build_allocate_payload(txmsg, port_num,
                                     id,
                                     pbn, port->num_sdp_streams, sinks);
 
@@ -1983,31 +2057,17 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
                mgr->mst_primary = mstb;
                kref_get(&mgr->mst_primary->kref);
 
-               {
-                       struct drm_dp_payload reset_pay;
-                       reset_pay.start_slot = 0;
-                       reset_pay.num_slots = 0x3f;
-                       drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
-               }
-
                ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
-                                        DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
+                                                        DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC);
                if (ret < 0) {
                        goto out_unlock;
                }
 
-
-               /* sort out guid */
-               ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, mgr->guid, 16);
-               if (ret != 16) {
-                       DRM_DEBUG_KMS("failed to read DP GUID %d\n", ret);
-                       goto out_unlock;
-               }
-
-               mgr->guid_valid = drm_dp_validate_guid(mgr, mgr->guid);
-               if (!mgr->guid_valid) {
-                       ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, mgr->guid, 16);
-                       mgr->guid_valid = true;
+               {
+                       struct drm_dp_payload reset_pay;
+                       reset_pay.start_slot = 0;
+                       reset_pay.num_slots = 0x3f;
+                       drm_dp_dpcd_write_payload(mgr, 0, &reset_pay);
                }
 
                queue_work(system_long_wq, &mgr->work);
@@ -2231,9 +2291,8 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
                        }
 
                        drm_dp_update_port(mstb, &msg.u.conn_stat);
-                       DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
-                       (*mgr->cbs->hotplug)(mgr);
 
+                       DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type);
                } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
                        drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false);
                        if (!mstb)
@@ -2320,10 +2379,6 @@ enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector
 
        case DP_PEER_DEVICE_SST_SINK:
                status = connector_status_connected;
-               /* for logical ports - cache the EDID */
-               if (port->port_num >= 8 && !port->cached_edid) {
-                       port->cached_edid = drm_get_edid(connector, &port->aux.ddc);
-               }
                break;
        case DP_PEER_DEVICE_DP_LEGACY_CONV:
                if (port->ldps)
@@ -2378,10 +2433,7 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_
 
        if (port->cached_edid)
                edid = drm_edid_duplicate(port->cached_edid);
-       else {
-               edid = drm_get_edid(connector, &port->aux.ddc);
-               drm_mode_connector_set_tile_property(connector);
-       }
+
        port->has_audio = drm_detect_monitor_audio(edid);
        drm_dp_put_port(port);
        return edid;
@@ -2446,6 +2498,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp
                DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn);
                if (pbn == port->vcpi.pbn) {
                        *slots = port->vcpi.num_slots;
+                       drm_dp_put_port(port);
                        return true;
                }
        }
@@ -2605,32 +2658,31 @@ EXPORT_SYMBOL(drm_dp_check_act_status);
  */
 int drm_dp_calc_pbn_mode(int clock, int bpp)
 {
-       fixed20_12 pix_bw;
-       fixed20_12 fbpp;
-       fixed20_12 result;
-       fixed20_12 margin, tmp;
-       u32 res;
-
-       pix_bw.full = dfixed_const(clock);
-       fbpp.full = dfixed_const(bpp);
-       tmp.full = dfixed_const(8);
-       fbpp.full = dfixed_div(fbpp, tmp);
-
-       result.full = dfixed_mul(pix_bw, fbpp);
-       margin.full = dfixed_const(54);
-       tmp.full = dfixed_const(64);
-       margin.full = dfixed_div(margin, tmp);
-       result.full = dfixed_div(result, margin);
-
-       margin.full = dfixed_const(1006);
-       tmp.full = dfixed_const(1000);
-       margin.full = dfixed_div(margin, tmp);
-       result.full = dfixed_mul(result, margin);
-
-       result.full = dfixed_div(result, tmp);
-       result.full = dfixed_ceil(result);
-       res = dfixed_trunc(result);
-       return res;
+       u64 kbps;
+       s64 peak_kbps;
+       u32 numerator;
+       u32 denominator;
+
+       kbps = clock * bpp;
+
+       /*
+        * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+        * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
+        * common multiplier to render an integer PBN for all link rate/lane
+        * counts combinations
+        * calculate
+        * peak_kbps *= (1006/1000)
+        * peak_kbps *= (64/54)
+        * peak_kbps *= 8    convert to bytes
+        */
+
+       numerator = 64 * 1006;
+       denominator = 54 * 8 * 1000 * 1000;
+
+       kbps *= numerator;
+       peak_kbps = drm_fixp_from_fraction(kbps, denominator);
+
+       return drm_fixp2int_ceil(peak_kbps);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
@@ -2638,11 +2690,23 @@ static int test_calc_pbn_mode(void)
 {
        int ret;
        ret = drm_dp_calc_pbn_mode(154000, 30);
-       if (ret != 689)
+       if (ret != 689) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               154000, 30, 689, ret);
                return -EINVAL;
+       }
        ret = drm_dp_calc_pbn_mode(234000, 30);
-       if (ret != 1047)
+       if (ret != 1047) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               234000, 30, 1047, ret);
                return -EINVAL;
+       }
+       ret = drm_dp_calc_pbn_mode(297000, 24);
+       if (ret != 1063) {
+               DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n",
+                               297000, 24, 1063, ret);
+               return -EINVAL;
+       }
        return 0;
 }
 
@@ -2783,6 +2847,13 @@ static void drm_dp_tx_work(struct work_struct *work)
        mutex_unlock(&mgr->qlock);
 }
 
+static void drm_dp_free_mst_port(struct kref *kref)
+{
+       struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref);
+       kref_put(&port->parent->kref, drm_dp_free_mst_branch_device);
+       kfree(port);
+}
+
 static void drm_dp_destroy_connector_work(struct work_struct *work)
 {
        struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
@@ -2803,13 +2874,22 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
                list_del(&port->next);
                mutex_unlock(&mgr->destroy_connector_lock);
 
+               kref_init(&port->kref);
+               INIT_LIST_HEAD(&port->next);
+
                mgr->cbs->destroy_connector(mgr, port->connector);
 
                drm_dp_port_teardown_pdt(port, port->pdt);
 
-               if (!port->input && port->vcpi.vcpi > 0)
-                       drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-               kfree(port);
+               if (!port->input && port->vcpi.vcpi > 0) {
+                       if (mgr->mst_state) {
+                               drm_dp_mst_reset_vcpi_slots(mgr, port);
+                               drm_dp_update_payload_part1(mgr);
+                               drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
+                       }
+               }
+
+               kref_put(&port->kref, drm_dp_free_mst_port);
                send_hotplug = true;
        }
        if (send_hotplug)
@@ -2847,6 +2927,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
        mgr->max_dpcd_transaction_bytes = max_dpcd_transaction_bytes;
        mgr->max_payloads = max_payloads;
        mgr->conn_base_id = conn_base_id;
+       if (max_payloads + 1 > sizeof(mgr->payload_mask) * 8 ||
+           max_payloads + 1 > sizeof(mgr->vcpi_mask) * 8)
+               return -EINVAL;
        mgr->payloads = kcalloc(max_payloads, sizeof(struct drm_dp_payload), GFP_KERNEL);
        if (!mgr->payloads)
                return -ENOMEM;
@@ -2854,7 +2937,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
        if (!mgr->proposed_vcpis)
                return -ENOMEM;
        set_bit(0, &mgr->payload_mask);
-       test_calc_pbn_mode();
+       if (test_calc_pbn_mode() < 0)
+               DRM_ERROR("MST PBN self-test failed\n");
+
        return 0;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
index c3b80fd65d6254e89caf3381529f673764286115..7b30b307674ba6d1b71065a6923758ea251d05b2 100644 (file)
@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
 void drm_ht_remove(struct drm_open_hash *ht)
 {
        if (ht->table) {
-               if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order)
-                       kfree(ht->table);
-               else
-                       vfree(ht->table);
+               kvfree(ht->table);
                ht->table = NULL;
        }
 }
index 9e585d51fb7807278aeef68221f186f8f36c4a3c..e881482b5971d16ee2539307f6bf41746fc4dbef 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@ Copyright (C) 2015
 #define ENDIAN_MODE_NO_SWAP                                    0x00000000
 #define ENDIAN_MODE_SWAP_16                                    0x00000001
 #define ENDIAN_MODE_SWAP_32                                    0x00000002
+#define chipModel_GC200                                                0x00000200
 #define chipModel_GC300                                                0x00000300
 #define chipModel_GC320                                                0x00000320
+#define chipModel_GC328                                                0x00000328
 #define chipModel_GC350                                                0x00000350
 #define chipModel_GC355                                                0x00000355
 #define chipModel_GC400                                                0x00000400
 #define chipModel_GC410                                                0x00000410
 #define chipModel_GC420                                                0x00000420
+#define chipModel_GC428                                                0x00000428
 #define chipModel_GC450                                                0x00000450
 #define chipModel_GC500                                                0x00000500
+#define chipModel_GC520                                                0x00000520
 #define chipModel_GC530                                                0x00000530
 #define chipModel_GC600                                                0x00000600
 #define chipModel_GC700                                                0x00000700
@@ -46,9 +50,16 @@ Copyright (C) 2015
 #define chipModel_GC860                                                0x00000860
 #define chipModel_GC880                                                0x00000880
 #define chipModel_GC1000                                       0x00001000
+#define chipModel_GC1500                                       0x00001500
 #define chipModel_GC2000                                       0x00002000
 #define chipModel_GC2100                                       0x00002100
+#define chipModel_GC2200                                       0x00002200
+#define chipModel_GC2500                                       0x00002500
+#define chipModel_GC3000                                       0x00003000
 #define chipModel_GC4000                                       0x00004000
+#define chipModel_GC5000                                       0x00005000
+#define chipModel_GC5200                                       0x00005200
+#define chipModel_GC6400                                       0x00006400
 #define RGBA_BITS_R                                            0x00000001
 #define RGBA_BITS_G                                            0x00000002
 #define RGBA_BITS_B                                            0x00000004
@@ -160,7 +171,7 @@ Copyright (C) 2015
 #define chipMinorFeatures2_UNK8                                        0x00000100
 #define chipMinorFeatures2_UNK9                                        0x00000200
 #define chipMinorFeatures2_UNK10                               0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16                      0x00000800
+#define chipMinorFeatures2_HALTI1                              0x00000800
 #define chipMinorFeatures2_UNK12                               0x00001000
 #define chipMinorFeatures2_UNK13                               0x00002000
 #define chipMinorFeatures2_UNK14                               0x00004000
@@ -189,7 +200,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK5                                        0x00000020
 #define chipMinorFeatures3_UNK6                                        0x00000040
 #define chipMinorFeatures3_UNK7                                        0x00000080
-#define chipMinorFeatures3_UNK8                                        0x00000100
+#define chipMinorFeatures3_FAST_MSAA                           0x00000100
 #define chipMinorFeatures3_UNK9                                        0x00000200
 #define chipMinorFeatures3_BUG_FIXES10                         0x00000400
 #define chipMinorFeatures3_UNK11                               0x00000800
@@ -199,7 +210,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK15                               0x00008000
 #define chipMinorFeatures3_UNK16                               0x00010000
 #define chipMinorFeatures3_UNK17                               0x00020000
-#define chipMinorFeatures3_UNK18                               0x00040000
+#define chipMinorFeatures3_ACE                                 0x00040000
 #define chipMinorFeatures3_UNK19                               0x00080000
 #define chipMinorFeatures3_UNK20                               0x00100000
 #define chipMinorFeatures3_UNK21                               0x00200000
@@ -207,7 +218,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK23                               0x00800000
 #define chipMinorFeatures3_UNK24                               0x01000000
 #define chipMinorFeatures3_UNK25                               0x02000000
-#define chipMinorFeatures3_UNK26                               0x04000000
+#define chipMinorFeatures3_NEW_HZ                              0x04000000
 #define chipMinorFeatures3_UNK27                               0x08000000
 #define chipMinorFeatures3_UNK28                               0x10000000
 #define chipMinorFeatures3_UNK29                               0x20000000
@@ -229,9 +240,9 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK13                               0x00002000
 #define chipMinorFeatures4_UNK14                               0x00004000
 #define chipMinorFeatures4_UNK15                               0x00008000
-#define chipMinorFeatures4_UNK16                               0x00010000
+#define chipMinorFeatures4_HALTI2                              0x00010000
 #define chipMinorFeatures4_UNK17                               0x00020000
-#define chipMinorFeatures4_UNK18                               0x00040000
+#define chipMinorFeatures4_SMALL_MSAA                          0x00040000
 #define chipMinorFeatures4_UNK19                               0x00080000
 #define chipMinorFeatures4_UNK20                               0x00100000
 #define chipMinorFeatures4_UNK21                               0x00200000
@@ -245,5 +256,37 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK29                               0x20000000
 #define chipMinorFeatures4_UNK30                               0x40000000
 #define chipMinorFeatures4_UNK31                               0x80000000
+#define chipMinorFeatures5_UNK0                                        0x00000001
+#define chipMinorFeatures5_UNK1                                        0x00000002
+#define chipMinorFeatures5_UNK2                                        0x00000004
+#define chipMinorFeatures5_UNK3                                        0x00000008
+#define chipMinorFeatures5_UNK4                                        0x00000010
+#define chipMinorFeatures5_UNK5                                        0x00000020
+#define chipMinorFeatures5_UNK6                                        0x00000040
+#define chipMinorFeatures5_UNK7                                        0x00000080
+#define chipMinorFeatures5_UNK8                                        0x00000100
+#define chipMinorFeatures5_HALTI3                              0x00000200
+#define chipMinorFeatures5_UNK10                               0x00000400
+#define chipMinorFeatures5_UNK11                               0x00000800
+#define chipMinorFeatures5_UNK12                               0x00001000
+#define chipMinorFeatures5_UNK13                               0x00002000
+#define chipMinorFeatures5_UNK14                               0x00004000
+#define chipMinorFeatures5_UNK15                               0x00008000
+#define chipMinorFeatures5_UNK16                               0x00010000
+#define chipMinorFeatures5_UNK17                               0x00020000
+#define chipMinorFeatures5_UNK18                               0x00040000
+#define chipMinorFeatures5_UNK19                               0x00080000
+#define chipMinorFeatures5_UNK20                               0x00100000
+#define chipMinorFeatures5_UNK21                               0x00200000
+#define chipMinorFeatures5_UNK22                               0x00400000
+#define chipMinorFeatures5_UNK23                               0x00800000
+#define chipMinorFeatures5_UNK24                               0x01000000
+#define chipMinorFeatures5_UNK25                               0x02000000
+#define chipMinorFeatures5_UNK26                               0x04000000
+#define chipMinorFeatures5_UNK27                               0x08000000
+#define chipMinorFeatures5_UNK28                               0x10000000
+#define chipMinorFeatures5_UNK29                               0x20000000
+#define chipMinorFeatures5_UNK30                               0x40000000
+#define chipMinorFeatures5_UNK31                               0x80000000
 
 #endif /* COMMON_XML */
index 5c89ebb52fd29b1357316fa000d4a61188f4d320..e8858985f01ea0fb5a17e39a3c7b246d6ae562d9 100644 (file)
@@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = {
        .probe      = etnaviv_pdev_probe,
        .remove     = etnaviv_pdev_remove,
        .driver     = {
-               .owner  = THIS_MODULE,
                .name   = "etnaviv",
                .of_match_table = dt_match,
        },
index d6bd438bd5bec73eb074aa15626fc8eb3d92d70b..1cd6046e76b11d824a06b24bd77a4cea3d1a1a93 100644 (file)
@@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
        struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
                struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
index bf8fa859e8bece4f037e1ef7eae1c6caf0a0c85d..4a29eeadbf1e738da23a6c29a2e00e8f68d81576 100644 (file)
@@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                obj = vram->object;
 
+               mutex_lock(&obj->lock);
                pages = etnaviv_gem_get_pages(obj);
+               mutex_unlock(&obj->lock);
                if (pages) {
                        int j;
 
@@ -213,8 +215,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                iter.hdr->iova = cpu_to_le64(vram->iova);
 
-               vaddr = etnaviv_gem_vaddr(&obj->base);
-               if (vaddr && !IS_ERR(vaddr))
+               vaddr = etnaviv_gem_vmap(&obj->base);
+               if (vaddr)
                        memcpy(iter.data, vaddr, obj->base.size);
 
                etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
index 9f77c3b94cc696c4683d18da3e7848fb040874c6..4b519e4309b28edb1a1a9f56c40fb00153c87036 100644 (file)
@@ -353,25 +353,39 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
        drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
-       mutex_lock(&etnaviv_obj->lock);
-       if (!etnaviv_obj->vaddr) {
-               struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-               if (IS_ERR(pages))
-                       return ERR_CAST(pages);
+       if (etnaviv_obj->vaddr)
+               return etnaviv_obj->vaddr;
 
-               etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-                               VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-       }
+       mutex_lock(&etnaviv_obj->lock);
+       /*
+        * Need to check again, as we might have raced with another thread
+        * while waiting for the mutex.
+        */
+       if (!etnaviv_obj->vaddr)
+               etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
        mutex_unlock(&etnaviv_obj->lock);
 
        return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+       struct page **pages;
+
+       lockdep_assert_held(&obj->lock);
+
+       pages = etnaviv_gem_get_pages(obj);
+       if (IS_ERR(pages))
+               return NULL;
+
+       return vmap(pages, obj->base.size >> PAGE_SHIFT,
+                       VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
        if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
        .get_pages = etnaviv_gem_shmem_get_pages,
        .release = etnaviv_gem_shmem_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
        .get_pages = etnaviv_gem_userptr_get_pages,
        .release = etnaviv_gem_userptr_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
index a300b4b3d5458654fd1c0f200227976f64529e97..ab5df8147a5f6f1aaccad5c6ce14d99612e0ea27 100644 (file)
@@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
 struct etnaviv_gem_ops {
        int (*get_pages)(struct etnaviv_gem_object *);
        void (*release)(struct etnaviv_gem_object *);
+       void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
index e94db4f95770e586837653b0a1b73d251bbe75d8..4e67395f5fa1c1572aa7051a86538aa16389f807 100644 (file)
@@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-       return etnaviv_gem_vaddr(obj);
+       return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
        drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+       lockdep_assert_held(&etnaviv_obj->lock);
+
+       return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
        /* .get_pages should never be called */
        .release = etnaviv_gem_prime_release,
+       .vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
index 056a72e6ed26273146269ee0258784e8a714d36a..a33162cf4f4cc6c4f0aee66433680a15f64cfffc 100644 (file)
@@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.minor_features3;
                break;
 
+       case ETNAVIV_PARAM_GPU_FEATURES_5:
+               *value = gpu->identity.minor_features4;
+               break;
+
+       case ETNAVIV_PARAM_GPU_FEATURES_6:
+               *value = gpu->identity.minor_features5;
+               break;
+
        case ETNAVIV_PARAM_GPU_STREAM_COUNT:
                *value = gpu->identity.stream_count;
                break;
@@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.num_constants;
                break;
 
+       case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+               *value = gpu->identity.varyings_count;
+               break;
+
        default:
                DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
                return -EINVAL;
@@ -120,46 +132,56 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
        return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+       ((gpu)->identity.model == chipModel_##mod && \
+        (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+       (((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
        if (gpu->identity.minor_features0 &
            chipMinorFeatures0_MORE_MINOR_FEATURES) {
-               u32 specs[2];
+               u32 specs[4];
+               unsigned int streams;
 
                specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
                specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
-
-               gpu->identity.stream_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-               gpu->identity.register_max =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-                               >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-               gpu->identity.thread_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-               gpu->identity.vertex_cache_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-               gpu->identity.shader_core_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-               gpu->identity.pixel_pipes =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-                               >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+               specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+               specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
+
+               gpu->identity.stream_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+               gpu->identity.register_max = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+               gpu->identity.thread_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+               gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+               gpu->identity.shader_core_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+               gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
                gpu->identity.vertex_output_buffer_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
-
-               gpu->identity.buffer_size =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-               gpu->identity.instruction_count =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-               gpu->identity.num_constants =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+                       etnaviv_field(specs[0],
+                               VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
+
+               gpu->identity.buffer_size = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+               gpu->identity.instruction_count = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+               gpu->identity.num_constants = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+               gpu->identity.varyings_count = etnaviv_field(specs[2],
+                                       VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+               /* This overrides the value from older register if non-zero */
+               streams = etnaviv_field(specs[3],
+                                       VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+               if (streams)
+                       gpu->identity.stream_count = streams;
        }
 
        /* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert the register max value */
        if (gpu->identity.register_max)
                gpu->identity.register_max = 1 << gpu->identity.register_max;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.register_max = 32;
        else
                gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert thread count */
        if (gpu->identity.thread_count)
                gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.thread_count = 64;
-       else if (gpu->identity.model == 0x0500 ||
-                gpu->identity.model == 0x0530)
+       else if (gpu->identity.model == chipModel_GC500 ||
+                gpu->identity.model == chipModel_GC530)
                gpu->identity.thread_count = 128;
        else
                gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        if (gpu->identity.vertex_output_buffer_size) {
                gpu->identity.vertex_output_buffer_size =
                        1 << gpu->identity.vertex_output_buffer_size;
-       } else if (gpu->identity.model == 0x0400) {
+       } else if (gpu->identity.model == chipModel_GC400) {
                if (gpu->identity.revision < 0x4000)
                        gpu->identity.vertex_output_buffer_size = 512;
                else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        switch (gpu->identity.instruction_count) {
        case 0:
-               if ((gpu->identity.model == 0x2000 &&
-                    gpu->identity.revision == 0x5108) ||
-                   gpu->identity.model == 0x880)
+               if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+                   gpu->identity.model == chipModel_GC880)
                        gpu->identity.instruction_count = 512;
                else
                        gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.num_constants == 0)
                gpu->identity.num_constants = 168;
+
+       if (gpu->identity.varyings_count == 0) {
+               if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+                       gpu->identity.varyings_count = 12;
+               else
+                       gpu->identity.varyings_count = 8;
+       }
+
+       /*
+        * For some cores, two varyings are consumed for position, so the
+        * maximum varying count needs to be reduced by one.
+        */
+       if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+           etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+           etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+           etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5106))
+               gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
        /* Special case for older graphic cores. */
-       if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-            >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-               gpu->identity.model    = 0x500; /* gc500 */
-               gpu->identity.revision =
-                       (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-                       >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+       if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+               gpu->identity.model    = chipModel_GC500;
+               gpu->identity.revision = etnaviv_field(chipIdentity,
+                                        VIVS_HI_CHIP_IDENTITY_REVISION);
        } else {
 
                gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                 * same.  Only for GC400 family.
                 */
                if ((gpu->identity.model & 0xff00) == 0x0400 &&
-                   gpu->identity.model != 0x0420) {
+                   gpu->identity.model != chipModel_GC420) {
                        gpu->identity.model = gpu->identity.model & 0x0400;
                }
 
                /* Another special case */
-               if (gpu->identity.model == 0x300 &&
-                   gpu->identity.revision == 0x2201) {
+               if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
                        u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
                        u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -295,11 +337,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
        /* Disable fast clear on GC700. */
-       if (gpu->identity.model == 0x700)
+       if (gpu->identity.model == chipModel_GC700)
                gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-       if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-           (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+       if ((gpu->identity.model == chipModel_GC500 &&
+            gpu->identity.revision < 2) ||
+           (gpu->identity.model == chipModel_GC300 &&
+            gpu->identity.revision < 0x2000)) {
 
                /*
                 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                gpu->identity.minor_features1 = 0;
                gpu->identity.minor_features2 = 0;
                gpu->identity.minor_features3 = 0;
+               gpu->identity.minor_features4 = 0;
+               gpu->identity.minor_features5 = 0;
        } else
                gpu->identity.minor_features0 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
                gpu->identity.minor_features3 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+               gpu->identity.minor_features4 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+               gpu->identity.minor_features5 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
        }
 
        /* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
        u16 prefetch;
 
-       if (gpu->identity.model == chipModel_GC320 &&
-           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-           (gpu->identity.revision == 0x5007 ||
-            gpu->identity.revision == 0x5220)) {
+       if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+            etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
                u32 mc_memory_debug;
 
                mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
                  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
        /* GC2000 rev 5108 needs a special bus config */
-       if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+       if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
                u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
                bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
                                VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.model == 0) {
                dev_err(gpu->dev, "Unknown GPU model\n");
-               pm_runtime_put_autosuspend(gpu->dev);
-               return -ENXIO;
+               ret = -ENXIO;
+               goto fail;
+       }
+
+       /* Exclude VG cores with FE2.0 */
+       if (gpu->identity.features & chipFeatures_PIPE_VG &&
+           gpu->identity.features & chipFeatures_FE20) {
+               dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+               ret = -ENXIO;
+               goto fail;
        }
 
        ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
                goto fail;
        }
 
-       /* TODO: we will leak here memory - fix it! */
-
        gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
        if (!gpu->mmu) {
+               iommu_domain_free(iommu);
                ret = -ENOMEM;
                goto fail;
        }
@@ -552,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
        if (!gpu->buffer) {
                ret = -ENOMEM;
                dev_err(gpu->dev, "could not create command buffer\n");
-               goto fail;
+               goto destroy_iommu;
        }
        if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
                ret = -EINVAL;
@@ -582,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 free_buffer:
        etnaviv_gpu_cmdbuf_free(gpu->buffer);
        gpu->buffer = NULL;
+destroy_iommu:
+       etnaviv_iommu_destroy(gpu->mmu);
+       gpu->mmu = NULL;
 fail:
        pm_runtime_mark_last_busy(gpu->dev);
        pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                   gpu->identity.minor_features2);
        seq_printf(m, "\t minor_features3: 0x%08x\n",
                   gpu->identity.minor_features3);
+       seq_printf(m, "\t minor_features4: 0x%08x\n",
+                  gpu->identity.minor_features4);
+       seq_printf(m, "\t minor_features5: 0x%08x\n",
+                  gpu->identity.minor_features5);
 
        seq_puts(m, "\tspecs\n");
        seq_printf(m, "\t stream_count:  %d\n",
@@ -664,6 +727,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                        gpu->identity.instruction_count);
        seq_printf(m, "\t num_constants: %d\n",
                        gpu->identity.num_constants);
+       seq_printf(m, "\t varyings_count: %d\n",
+                       gpu->identity.varyings_count);
 
        seq_printf(m, "\taxi: 0x%08x\n", axi);
        seq_printf(m, "\tidle: 0x%08x\n", idle);
index c75d50359ab07baa40d7813fe07cd9928009df91..f233ac4c7c1cea456ee4c4c4b7e0d0878aefc093 100644 (file)
@@ -46,6 +46,12 @@ struct etnaviv_chip_identity {
        /* Supported minor feature 3 fields. */
        u32 minor_features3;
 
+       /* Supported minor feature 4 fields. */
+       u32 minor_features4;
+
+       /* Supported minor feature 5 fields. */
+       u32 minor_features5;
+
        /* Number of streams supported. */
        u32 stream_count;
 
@@ -75,6 +81,9 @@ struct etnaviv_chip_identity {
 
        /* Buffer size */
        u32 buffer_size;
+
+       /* Number of varyings */
+       u8 varyings_count;
 };
 
 struct etnaviv_event {
index 0064f2640396111bce54f3ae0caa9f45e94717d0..6a7de5f1454a51532c0809e2e741c53db4fba4e9 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@ Copyright (C) 2015
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3                           0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3                                   0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK              0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT             4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK              0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT             0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4                           0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4                                   0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK                        0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT               12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)                   (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5                           0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID                                        0x000000a8
+
 #define VIVS_PM                                                        0x00000000
 
 #define VIVS_PM_POWER_CONTROLS                                 0x00000100
@@ -206,6 +223,11 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE            0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE            0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE            0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH            0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA            0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE            0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA            0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX            0x00000080
 
 #define VIVS_PM_PULSE_EATER                                    0x0000010c
 
index b79c316c2ad2ce7b7eb48e152a49c41ab6e93431..673164b331c8806e14aaa10d82e2f787c10bfb09 100644 (file)
@@ -1392,7 +1392,7 @@ static const struct component_ops exynos_dp_ops = {
 static int exynos_dp_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       struct device_node *panel_node = NULL, *bridge_node, *endpoint = NULL;
+       struct device_node *np = NULL, *endpoint = NULL;
        struct exynos_dp_device *dp;
        int ret;
 
@@ -1404,41 +1404,36 @@ static int exynos_dp_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dp);
 
        /* This is for the backward compatibility. */
-       panel_node = of_parse_phandle(dev->of_node, "panel", 0);
-       if (panel_node) {
-               dp->panel = of_drm_find_panel(panel_node);
-               of_node_put(panel_node);
+       np = of_parse_phandle(dev->of_node, "panel", 0);
+       if (np) {
+               dp->panel = of_drm_find_panel(np);
+               of_node_put(np);
                if (!dp->panel)
                        return -EPROBE_DEFER;
-       } else {
-               endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-               if (endpoint) {
-                       panel_node = of_graph_get_remote_port_parent(endpoint);
-                       if (panel_node) {
-                               dp->panel = of_drm_find_panel(panel_node);
-                               of_node_put(panel_node);
-                               if (!dp->panel)
-                                       return -EPROBE_DEFER;
-                       } else {
-                               DRM_ERROR("no port node for panel device.\n");
-                               return -EINVAL;
-                       }
-               }
-       }
-
-       if (endpoint)
                goto out;
+       }
 
        endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
        if (endpoint) {
-               bridge_node = of_graph_get_remote_port_parent(endpoint);
-               if (bridge_node) {
-                       dp->ptn_bridge = of_drm_find_bridge(bridge_node);
-                       of_node_put(bridge_node);
-                       if (!dp->ptn_bridge)
-                               return -EPROBE_DEFER;
-               } else
-                       return -EPROBE_DEFER;
+               np = of_graph_get_remote_port_parent(endpoint);
+               if (np) {
+                       /* The remote port can be either a panel or a bridge */
+                       dp->panel = of_drm_find_panel(np);
+                       if (!dp->panel) {
+                               dp->ptn_bridge = of_drm_find_bridge(np);
+                               if (!dp->ptn_bridge) {
+                                       of_node_put(np);
+                                       return -EPROBE_DEFER;
+                               }
+                       }
+                       of_node_put(np);
+               } else {
+                       DRM_ERROR("no remote endpoint device node found.\n");
+                       return -EINVAL;
+               }
+       } else {
+               DRM_ERROR("no port endpoint subnode found.\n");
+               return -EINVAL;
        }
 
 out:
index d84a498ef099712c56f319c8333c8c8564ef457a..e977a81af2e67d91101e644b393d8ce1393b3fc8 100644 (file)
@@ -1906,8 +1906,7 @@ static int exynos_dsi_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int exynos_dsi_suspend(struct device *dev)
+static int __maybe_unused exynos_dsi_suspend(struct device *dev)
 {
        struct drm_encoder *encoder = dev_get_drvdata(dev);
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1938,7 +1937,7 @@ static int exynos_dsi_suspend(struct device *dev)
        return 0;
 }
 
-static int exynos_dsi_resume(struct device *dev)
+static int __maybe_unused exynos_dsi_resume(struct device *dev)
 {
        struct drm_encoder *encoder = dev_get_drvdata(dev);
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1972,7 +1971,6 @@ err_clk:
 
        return ret;
 }
-#endif
 
 static const struct dev_pm_ops exynos_dsi_pm_ops = {
        SET_RUNTIME_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume, NULL)
index b5fbc1cbf02454d0e8c76de88f91f2c63e343e2c..0a5a60005f7e5711e14147dfa21990be1b88a2b8 100644 (file)
@@ -1289,8 +1289,7 @@ static int mixer_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int exynos_mixer_suspend(struct device *dev)
+static int __maybe_unused exynos_mixer_suspend(struct device *dev)
 {
        struct mixer_context *ctx = dev_get_drvdata(dev);
        struct mixer_resources *res = &ctx->mixer_res;
@@ -1306,7 +1305,7 @@ static int exynos_mixer_suspend(struct device *dev)
        return 0;
 }
 
-static int exynos_mixer_resume(struct device *dev)
+static int __maybe_unused exynos_mixer_resume(struct device *dev)
 {
        struct mixer_context *ctx = dev_get_drvdata(dev);
        struct mixer_resources *res = &ctx->mixer_res;
@@ -1342,7 +1341,6 @@ static int exynos_mixer_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static const struct dev_pm_ops exynos_mixer_pm_ops = {
        SET_RUNTIME_PM_OPS(exynos_mixer_suspend, exynos_mixer_resume, NULL)
index 533d1e3d4a999f971b2479f471c965e3708a73f2..a02112ba1c3df692b3a089a3ab24a8f30d35b7ec 100644 (file)
@@ -136,6 +136,7 @@ static bool adv7511_register_volatile(struct device *dev, unsigned int reg)
        case ADV7511_REG_BKSV(3):
        case ADV7511_REG_BKSV(4):
        case ADV7511_REG_DDC_STATUS:
+       case ADV7511_REG_EDID_READ_CTRL:
        case ADV7511_REG_BSTATUS(0):
        case ADV7511_REG_BSTATUS(1):
        case ADV7511_REG_CHIP_ID_HIGH:
@@ -362,24 +363,31 @@ static void adv7511_power_on(struct adv7511 *adv7511)
 {
        adv7511->current_edid_segment = -1;
 
-       regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                    ADV7511_INT0_EDID_READY);
-       regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-                    ADV7511_INT1_DDC_ERROR);
        regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
                           ADV7511_POWER_POWER_DOWN, 0);
+       if (adv7511->i2c_main->irq) {
+               /*
+                * Documentation says the INT_ENABLE registers are reset in
+                * POWER_DOWN mode. My 7511w preserved the bits, however.
+                * Still, let's be safe and stick to the documentation.
+                */
+               regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+                            ADV7511_INT0_EDID_READY);
+               regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+                            ADV7511_INT1_DDC_ERROR);
+       }
 
        /*
-        * Per spec it is allowed to pulse the HDP signal to indicate that the
+        * Per spec it is allowed to pulse the HPD signal to indicate that the
         * EDID information has changed. Some monitors do this when they wakeup
-        * from standby or are enabled. When the HDP goes low the adv7511 is
+        * from standby or are enabled. When the HPD goes low the adv7511 is
         * reset and the outputs are disabled which might cause the monitor to
-        * go to standby again. To avoid this we ignore the HDP pin for the
+        * go to standby again. To avoid this we ignore the HPD pin for the
         * first few seconds after enabling the output.
         */
        regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-                          ADV7511_REG_POWER2_HDP_SRC_MASK,
-                          ADV7511_REG_POWER2_HDP_SRC_NONE);
+                          ADV7511_REG_POWER2_HPD_SRC_MASK,
+                          ADV7511_REG_POWER2_HPD_SRC_NONE);
 
        /*
         * Most of the registers are reset during power down or when HPD is low.
@@ -413,9 +421,9 @@ static bool adv7511_hpd(struct adv7511 *adv7511)
        if (ret < 0)
                return false;
 
-       if (irq0 & ADV7511_INT0_HDP) {
+       if (irq0 & ADV7511_INT0_HPD) {
                regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                            ADV7511_INT0_HDP);
+                            ADV7511_INT0_HPD);
                return true;
        }
 
@@ -438,7 +446,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
        regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
        regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-       if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
+       if (irq0 & ADV7511_INT0_HPD && adv7511->encoder)
                drm_helper_hpd_irq_event(adv7511->encoder->dev);
 
        if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
@@ -567,12 +575,14 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 
        /* Reading the EDID only works if the device is powered */
        if (!adv7511->powered) {
-               regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
-                            ADV7511_INT0_EDID_READY);
-               regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
-                            ADV7511_INT1_DDC_ERROR);
                regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
                                   ADV7511_POWER_POWER_DOWN, 0);
+               if (adv7511->i2c_main->irq) {
+                       regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(0),
+                                    ADV7511_INT0_EDID_READY);
+                       regmap_write(adv7511->regmap, ADV7511_REG_INT_ENABLE(1),
+                                    ADV7511_INT1_DDC_ERROR);
+               }
                adv7511->current_edid_segment = -1;
        }
 
@@ -638,10 +648,10 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
                if (adv7511->status == connector_status_connected)
                        status = connector_status_disconnected;
        } else {
-               /* Renable HDP sensing */
+               /* Renable HPD sensing */
                regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
-                                  ADV7511_REG_POWER2_HDP_SRC_MASK,
-                                  ADV7511_REG_POWER2_HDP_SRC_BOTH);
+                                  ADV7511_REG_POWER2_HPD_SRC_MASK,
+                                  ADV7511_REG_POWER2_HPD_SRC_BOTH);
        }
 
        adv7511->status = status;
index 6599ed538426d60f41a9dbebe3e9bcd8e8b11acc..38515b30cedfc84812a556638fe7333ff6ebeb8b 100644 (file)
@@ -90,7 +90,7 @@
 #define ADV7511_CSC_ENABLE                     BIT(7)
 #define ADV7511_CSC_UPDATE_MODE                        BIT(5)
 
-#define ADV7511_INT0_HDP                       BIT(7)
+#define ADV7511_INT0_HPD                       BIT(7)
 #define ADV7511_INT0_VSYNC                     BIT(5)
 #define ADV7511_INT0_AUDIO_FIFO_FULL           BIT(4)
 #define ADV7511_INT0_EDID_READY                        BIT(2)
 #define ADV7511_PACKET_ENABLE_SPARE2           BIT(1)
 #define ADV7511_PACKET_ENABLE_SPARE1           BIT(0)
 
-#define ADV7511_REG_POWER2_HDP_SRC_MASK                0xc0
-#define ADV7511_REG_POWER2_HDP_SRC_BOTH                0x00
-#define ADV7511_REG_POWER2_HDP_SRC_HDP         0x40
-#define ADV7511_REG_POWER2_HDP_SRC_CEC         0x80
-#define ADV7511_REG_POWER2_HDP_SRC_NONE                0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_MASK                0xc0
+#define ADV7511_REG_POWER2_HPD_SRC_BOTH                0x00
+#define ADV7511_REG_POWER2_HPD_SRC_HPD         0x40
+#define ADV7511_REG_POWER2_HPD_SRC_CEC         0x80
+#define ADV7511_REG_POWER2_HPD_SRC_NONE                0xc0
 #define ADV7511_REG_POWER2_TDMS_ENABLE         BIT(4)
 #define ADV7511_REG_POWER2_GATE_INPUT_CLK      BIT(0)
 
index fcd77b27514dfdb738334024c2b1201732af6dc3..051eab33e4c7b13260994ebb884cc44a5394c4bc 100644 (file)
@@ -10,7 +10,6 @@ config DRM_I915
        # the shmem_readpage() which depends upon tmpfs
        select SHMEM
        select TMPFS
-       select STOP_MACHINE
        select DRM_KMS_HELPER
        select DRM_PANEL
        select DRM_MIPI_DSI
index 3ac616d7363bafcc4197aa126c509932d15fb299..f357058c74d938a41f922c4b40e3da1d67aa029a 100644 (file)
@@ -501,7 +501,9 @@ void intel_detect_pch(struct drm_device *dev)
                                WARN_ON(!IS_SKYLAKE(dev) &&
                                        !IS_KABYLAKE(dev));
                        } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
-                                  (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) {
+                                  ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
+                                   pch->subsystem_vendor == 0x1af4 &&
+                                   pch->subsystem_device == 0x1100)) {
                                dev_priv->pch_type = intel_virt_detect_pch(dev);
                        } else
                                continue;
index 2f00828ccc6e923a65447424c716d9cc5ee0e99b..5feb65725c04e350c09d33b8969a53770d3d6049 100644 (file)
@@ -2946,7 +2946,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
        struct i915_vma *vma;
        u64 offset;
 
-       intel_fill_fb_ggtt_view(&view, intel_plane->base.fb,
+       intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
                                intel_plane->base.state);
 
        vma = i915_gem_obj_to_ggtt_view(obj, &view);
@@ -12075,11 +12075,21 @@ connected_sink_compute_bpp(struct intel_connector *connector,
                pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
        }
 
-       /* Clamp bpp to 8 on screens without EDID 1.4 */
-       if (connector->base.display_info.bpc == 0 && bpp > 24) {
-               DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
-                             bpp);
-               pipe_config->pipe_bpp = 24;
+       /* Clamp bpp to default limit on screens without EDID 1.4 */
+       if (connector->base.display_info.bpc == 0) {
+               int type = connector->base.connector_type;
+               int clamp_bpp = 24;
+
+               /* Fall back to 18 bpp when DP sink capability is unknown. */
+               if (type == DRM_MODE_CONNECTOR_DisplayPort ||
+                   type == DRM_MODE_CONNECTOR_eDP)
+                       clamp_bpp = 18;
+
+               if (bpp > clamp_bpp) {
+                       DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
+                                     bpp, clamp_bpp);
+                       pipe_config->pipe_bpp = clamp_bpp;
+               }
        }
 }
 
@@ -13883,11 +13893,12 @@ intel_check_primary_plane(struct drm_plane *plane,
        int max_scale = DRM_PLANE_HELPER_NO_SCALING;
        bool can_position = false;
 
-       /* use scaler when colorkey is not required */
-       if (INTEL_INFO(plane->dev)->gen >= 9 &&
-           state->ckey.flags == I915_SET_COLORKEY_NONE) {
-               min_scale = 1;
-               max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+       if (INTEL_INFO(plane->dev)->gen >= 9) {
+               /* use scaler when colorkey is not required */
+               if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
+                       min_scale = 1;
+                       max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+               }
                can_position = true;
        }
 
index 3aa614731d7e4b7160a85336d96df41469587991..f1fa756c5d5d59bf8877daded5c23329f63ed331 100644 (file)
@@ -1707,6 +1707,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
 
index 339701d7a9a5069ccce00f2f171f923073d2c3b5..40c6aff57256140a3dccd780356cfcd859f377ba 100644 (file)
@@ -331,6 +331,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
        if (invalidate_domains) {
@@ -403,6 +404,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req,
        if (flush_domains) {
                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
                flags |= PIPE_CONTROL_FLUSH_ENABLE;
        }
        if (invalidate_domains) {
index 6bfc46369db1b4f8daa02fba453a382e863a3585..367a916f364e94617a1b15fcd7d78cd3f90c8dc6 100644 (file)
@@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
                unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
                        DENTIST_DPREFCLK_WDIVIDER_MASK) >>
                        DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-               if (div < 128 && div >= 96)
-                       div -= 64;
-               else if (div >= 64)
-                       div = div / 2 - 16;
-               else if (div >= 8)
-                       div /= 4;
-               else
-                       div = 0;
+               div = radeon_audio_decode_dfs_div(div);
 
                if (div)
-                       clock = rdev->clock.gpupll_outputfreq * 10 / div;
+                       clock = clock * 100 / div;
 
                WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
                WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
index 9953356fe2637cfacdc2ba41e8ecd082d65213ff..3cf04a2f44bbb05169264a504349dec3d949e5bb 100644 (file)
@@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev,
         * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
         * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
         */
+       if (ASIC_IS_DCE41(rdev)) {
+               unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+                       DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+                       DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+               div = radeon_audio_decode_dfs_div(div);
+
+               if (div)
+                       clock = 100 * clock / div;
+       }
+
        WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
        WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
index 4aa5f755572b1593a8b6f7876cf5f7aed183715d..13b6029d65cc524528aec6a1f1727b9d695aa4af 100644 (file)
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL                     0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)            (((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK          (0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT         24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
index 5ae6db98aa4d1eae63990bbd5d8cff1b82dfc6e3..78a51b3eda10c4d2931c3c783221ad0625d9a9c9 100644 (file)
@@ -268,7 +268,7 @@ struct radeon_clock {
        uint32_t current_dispclk;
        uint32_t dp_extclk;
        uint32_t max_pixel_clock;
-       uint32_t gpupll_outputfreq;
+       uint32_t vco_freq;
 };
 
 /*
index 08fc1b5effa8a5ec21d4f6234bc168c5b2508b36..de9a2ffcf5f762539d6d4c92464ed5eff19e1c42 100644 (file)
@@ -1106,6 +1106,31 @@ union firmware_info {
        ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+       struct radeon_mode_info *mode_info = &rdev->mode_info;
+       int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+       union igp_info *igp_info;
+       u8 frev, crev;
+       u16 data_offset;
+
+       if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+                       &frev, &crev, &data_offset)) {
+               igp_info = (union igp_info *)(mode_info->atom_context->bios +
+                       data_offset);
+               rdev->clock.vco_freq =
+                       le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+       }
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
        struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
                rdev->mode_info.firmware_flags =
                        le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-               if (ASIC_IS_DCE8(rdev)) {
-                       rdev->clock.gpupll_outputfreq =
+               if (ASIC_IS_DCE8(rdev))
+                       rdev->clock.vco_freq =
                                le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-                       if (rdev->clock.gpupll_outputfreq == 0)
-                               rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */
-               }
+               else if (ASIC_IS_DCE5(rdev))
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+               else if (ASIC_IS_DCE41(rdev))
+                       radeon_atombios_get_dentist_vco_freq(rdev);
+               else
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+               if (rdev->clock.vco_freq == 0)
+                       rdev->clock.vco_freq = 360000;  /* 3.6 GHz */
 
                return true;
        }
@@ -1270,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
        return false;
 }
 
-union igp_info {
-       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
        struct radeon_mode_info *mode_info = &rdev->mode_info;
index 2c02e99b5f95a65a26357f26d67e91a45f8224a4..b214663b370da00905faa882ea27513153aa91bd 100644 (file)
@@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
        struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
        struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
        struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-       struct radeon_connector_atom_dig *dig_connector =
-               radeon_connector->con_priv;
 
        if (!dig || !dig->afmt)
                return;
@@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
                radeon_audio_write_speaker_allocation(encoder);
                radeon_audio_write_sad_regs(encoder);
                radeon_audio_write_latency_fields(encoder, mode);
-               if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-                       radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-               else
-                       radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+               radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
                radeon_audio_set_audio_packet(encoder);
                radeon_audio_select_pin(encoder);
 
@@ -781,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode)
        if (radeon_encoder->audio && radeon_encoder->audio->dpms)
                radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+       if (div >= 8 && div < 64)
+               return (div - 8) * 25 + 200;
+       else if (div >= 64 && div < 96)
+               return (div - 64) * 50 + 1600;
+       else if (div >= 96 && div < 128)
+               return (div - 96) * 100 + 3200;
+       else
+               return 0;
+}
index 059cc3012062a7b50708620c557771dcfea821c0..5c70cceaa4a6c35e673ff9c50306e36e7f93ff3d 100644 (file)
@@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev);
 void radeon_audio_mode_set(struct drm_encoder *encoder,
        struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif
index b3bb92368ae0e98372e55caab123fb720ecdc48d..298ea1c453c3638fecdf6cef5afce1a48fdefa82 100644 (file)
@@ -1670,8 +1670,10 @@ int radeon_modeset_init(struct radeon_device *rdev)
        /* setup afmt */
        radeon_afmt_init(rdev);
 
-       radeon_fbdev_init(rdev);
-       drm_kms_helper_poll_init(rdev->ddev);
+       if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+               radeon_fbdev_init(rdev);
+               drm_kms_helper_poll_init(rdev->ddev);
+       }
 
        /* do pm late init */
        ret = radeon_pm_late_init(rdev);
index 3dcc5733ff6915b2e2497ca3d4ff800455f49c20..e26c963f2e9304e58f25517114e773b3c85242bd 100644 (file)
@@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
        bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
        if (!bo_va) {
                args->operation = RADEON_VA_RESULT_ERROR;
+               radeon_bo_unreserve(rbo);
                drm_gem_object_unreference_unlocked(gobj);
                return -ENOENT;
        }
index 84d45633d28c9b1c58f58dc6ec6383eba2207230..fb6ad143873f5b40d2c027a20dd25914dbd5e29e 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
+#include <drm/drm_cache.h>
 #include "radeon.h"
 #include "radeon_trace.h"
 
@@ -245,6 +246,12 @@ int radeon_bo_create(struct radeon_device *rdev,
                DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
                              "better performance thanks to write-combining\n");
        bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+#else
+       /* For architectures that don't support WC memory,
+        * mask out the WC flag from the BO
+        */
+       if (!drm_arch_can_wc_memory())
+               bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
        radeon_ttm_placement_from_domain(bo, domain);
index 07a0d378e122b1b206b8aceb1a97234254d00e74..a01efe39a8206ec1b1d1fd26a3192c5d72abb885 100644 (file)
@@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
                return -EINVAL;
        }
 
-       for (i = 0; i < sign->num; ++i) {
-               if (sign->val[i].chip_id == chip_id)
+       for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+               if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
                        break;
        }
 
-       if (i == sign->num)
+       if (i == le32_to_cpu(sign->num))
                return -EINVAL;
 
        data += (256 - 64) / 4;
@@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
        data[1] = sign->val[i].nonce[1];
        data[2] = sign->val[i].nonce[2];
        data[3] = sign->val[i].nonce[3];
-       data[4] = sign->len + 64;
+       data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
        memset(&data[5], 0, 44);
        memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-       data += data[4] / 4;
+       data += le32_to_cpu(data[4]) / 4;
        data[0] = sign->val[i].sigval[0];
        data[1] = sign->val[i].sigval[1];
        data[2] = sign->val[i].sigval[2];
        data[3] = sign->val[i].sigval[3];
 
-       rdev->vce.keyselect = sign->val[i].keyselect;
+       rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
        return 0;
 }
index d1dc0f7b01dbdf48ad97e2829278b7f61bb6efea..f6a809afceec2e35237c855df8d2803a09674147 100644 (file)
@@ -2,11 +2,11 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-               rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+               rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-                               rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
index 7bfe243c61737bd112cf591366611ce545e86598..f8f8f29fb7c336d8fffe1a74bddbad66e01854f6 100644 (file)
@@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi)
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-       unsigned int bpp, i, pre;
+       unsigned int i, pre;
        unsigned long mpclk, pllref, tmp;
        unsigned int m = 1, n = 1, target_mbps = 1000;
        unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+       int bpp;
 
        bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
        if (bpp < 0) {
index 8397d1b62ef9460910bb28c96e0675a53f5d1b50..a0d51ccb6ea449802a0ff4fac1bcd8b4cf510d3a 100644 (file)
@@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 
        return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
                                    struct device *dev)
 {
        arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
                                 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 
        priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
                                                int pipe)
index f7844883cb76cb8cb00c434aaeb9d85d63b0160a..3b8f652698f828574c19f624264e816ab1299ee9 100644 (file)
@@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 
        return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
@@ -177,8 +176,23 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
                crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *                             | <-- HW vsync irq and reg take effect
+ *            plane_commit --> |
+ *     get_vblank and wait --> |
+ *                             | <-- handle_vblank, vblank->count + 1
+ *              cleanup_fb --> |
+ *             iommu crash --> |
+ *                             | <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
        struct drm_crtc_state *old_crtc_state;
        struct drm_crtc *crtc;
@@ -194,6 +208,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
                if (!crtc->state->active)
                        continue;
 
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
+                       continue;
+
                ret = drm_crtc_vblank_get(crtc);
                if (ret != 0)
                        continue;
@@ -241,7 +259,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit)
 
        drm_atomic_helper_commit_planes(dev, state, true);
 
-       rockchip_atomic_wait_for_complete(state);
+       rockchip_atomic_wait_for_complete(dev, state);
 
        drm_atomic_helper_cleanup_planes(dev, state);
 
index 50432e9b5b37f38fcb3c3a10cf28573bf2d59163..73718c5f5bbf720ca6151fc44ca670fc8646c6a9 100644 (file)
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+       return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */
index d908321b94ced4ffe10936f4c7ef37fc518d127a..18e07338c6e5f31f45b9a38c17884164fc476cff 100644 (file)
@@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv,
        /*
         * align to 64 bytes since Mali requires it.
         */
-       min_pitch = ALIGN(min_pitch, 64);
-
-       if (args->pitch < min_pitch)
-               args->pitch = min_pitch;
-
-       if (args->size < args->pitch * args->height)
-               args->size = args->pitch * args->height;
+       args->pitch = ALIGN(min_pitch, 64);
+       args->size = args->pitch * args->height;
 
        rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
                                                 &args->handle);
index 46c2a8dfd8aa78b22caf02d9b1e217b8d011dd25..fd370548d7d75dc1990226110cf24ea0353a002c 100644 (file)
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
                __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-               __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+               __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
                REG_SET(x, win->base, win->phy->name, v, RELAXED)
 #define VOP_INTR_GET(vop, name) \
                vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-               REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+               REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
        do { \
-               int i, reg = 0; \
+               int i, reg = 0, mask = 0; \
                for (i = 0; i < vop->data->intr->nintrs; i++) { \
-                       if (vop->data->intr->intrs[i] & type) \
+                       if (vop->data->intr->intrs[i] & type) \
                                reg |= (v) << i; \
+                               mask |= 1 << i; \
+                       } \
                } \
-               VOP_INTR_SET(vop, name, reg); \
+               VOP_INTR_SET(vop, name, mask, reg); \
        } while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
                vop_get_intr_type(vop, &vop->data->intr->name, type)
index 424d515ffcdafae650c9d472a02600db7c0a8485..314ff71db978dedfd8272ea0afe0087f1903ca4a 100644 (file)
@@ -144,19 +144,16 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
 int
 vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
 {
-       if (on)
-               return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-       else
-               return pm_generic_resume(&vc4->v3d->pdev->dev);
+       /* XXX: This interface is needed for GPU reset, and the way to
+        * do it is to turn our power domain off and back on.  We
+        * can't just reset from within the driver, because the reset
+        * bits are in the power domain's register area, and get set
+        * during the poweron process.
+        */
+       return 0;
 }
 
 static void vc4_v3d_init_hw(struct drm_device *dev)
index c49812b80dd0dae82c77096f75fbd4cced19ffb1..24fb348a44e141f71574f20d1e57bdaad2759ce6 100644 (file)
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 static int __init vmwgfx_init(void)
 {
        int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+       if (vgacon_text_force())
+               return -EINVAL;
+#endif
+
        ret = drm_pci_init(&driver, &vmw_pci_driver);
        if (ret)
                DRM_ERROR("Failed initializing DRM.\n");
index c8487894b31236cefd761b24cac48fb4e17e6d52..c43318d3416e20a4d027a32402b1cf751718b8c6 100644 (file)
@@ -930,6 +930,17 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+       {
+               /*
+                * CPU fan speed going up and down on Dell Studio XPS 8000
+                * for unknown reasons.
+                */
+               .ident = "Dell Studio XPS 8000",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+               },
+       },
        {
                /*
                 * CPU fan speed going up and down on Dell Studio XPS 8100
index f77eb971ce959a6d3501dfdf740e8f8f566383e5..4f695d8fcafaceab3c5d8caaa677b58e94ae9c2c 100644 (file)
@@ -90,7 +90,15 @@ static ssize_t show_power(struct device *dev,
        pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
                                  REG_TDP_LIMIT3, &val);
 
-       tdp_limit = val >> 16;
+       /*
+        * On Carrizo and later platforms, ApmTdpLimit bit field
+        * is extended to 16:31 from 16:28.
+        */
+       if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+               tdp_limit = val >> 16;
+       else
+               tdp_limit = (val >> 16) & 0x1fff;
+
        curr_pwr_watts = ((u64)(tdp_limit +
                                data->base_tdp)) << running_avg_range;
        curr_pwr_watts -= running_avg_capture;
index 52f708bcf77f397952ea0f278ce5b161780e076a..d50c701b19d678e9998319be36a492bb3a8eba6d 100644 (file)
@@ -313,6 +313,10 @@ int of_hwspin_lock_get_id(struct device_node *np, int index)
                hwlock = radix_tree_deref_slot(slot);
                if (unlikely(!hwlock))
                        continue;
+               if (radix_tree_is_indirect_ptr(hwlock)) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
 
                if (hwlock->bank->dev->of_node == args.np) {
                        ret = 0;
index ba9732c236c52985c922a63d50bec477585af610..10fbd6d841e06aef8ae0a627f9fcf36106f5119b 100644 (file)
@@ -874,7 +874,8 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
        i2c_set_adapdata(adap, dev);
 
        i2c_dw_disable_int(dev);
-       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, IRQF_SHARED,
+       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+                            IRQF_SHARED | IRQF_COND_SUSPEND,
                             dev_name(dev->dev), dev);
        if (r) {
                dev_err(dev->dev, "failure requesting irq %i: %d\n",
index e045985950737c429c3c3253620559f55be1244e..93f2895383ee55bb9f060cf145af92adbbeb512e 100644 (file)
@@ -137,10 +137,11 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 };
 
 /* SB800 globals */
+static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
-       "SDA0", "SDA2", "SDA3", "SDA4"
+       " port 0", " port 2", " port 3", " port 4"
 };
-static const char *piix4_aux_port_name_sb800 = "SDA1";
+static const char *piix4_aux_port_name_sb800 = " port 1";
 
 struct i2c_piix4_adapdata {
        unsigned short smba;
@@ -148,7 +149,6 @@ struct i2c_piix4_adapdata {
        /* SB800 */
        bool sb800_main;
        unsigned short port;
-       struct mutex *mutex;
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -275,10 +275,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
        else
                smb_en = (aux) ? 0x28 : 0x2c;
 
+       mutex_lock(&piix4_mutex_sb800);
        outb_p(smb_en, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
        outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
        smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
+       mutex_unlock(&piix4_mutex_sb800);
 
        if (!smb_en) {
                smb_en_status = smba_en_lo & 0x10;
@@ -559,7 +561,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
        u8 port;
        int retval;
 
-       mutex_lock(adapdata->mutex);
+       mutex_lock(&piix4_mutex_sb800);
 
        outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -574,7 +576,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
        outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-       mutex_unlock(adapdata->mutex);
+       mutex_unlock(&piix4_mutex_sb800);
 
        return retval;
 }
@@ -625,6 +627,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+                            bool sb800_main, unsigned short port,
                             const char *name, struct i2c_adapter **padap)
 {
        struct i2c_adapter *adap;
@@ -639,7 +642,8 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-       adap->algo = &smbus_algorithm;
+       adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800
+                               : &smbus_algorithm;
 
        adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
        if (adapdata == NULL) {
@@ -649,12 +653,14 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
        }
 
        adapdata->smba = smba;
+       adapdata->sb800_main = sb800_main;
+       adapdata->port = port;
 
        /* set up the sysfs linkage to our parent device */
        adap->dev.parent = &dev->dev;
 
        snprintf(adap->name, sizeof(adap->name),
-               "SMBus PIIX4 adapter %s at %04x", name, smba);
+               "SMBus PIIX4 adapter%s at %04x", name, smba);
 
        i2c_set_adapdata(adap, adapdata);
 
@@ -673,30 +679,16 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 {
-       struct mutex *mutex;
        struct i2c_piix4_adapdata *adapdata;
        int port;
        int retval;
 
-       mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-       if (mutex == NULL)
-               return -ENOMEM;
-
-       mutex_init(mutex);
-
        for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
-               retval = piix4_add_adapter(dev, smba,
+               retval = piix4_add_adapter(dev, smba, true, port,
                                           piix4_main_port_names_sb800[port],
                                           &piix4_main_adapters[port]);
                if (retval < 0)
                        goto error;
-
-               piix4_main_adapters[port]->algo = &piix4_smbus_algorithm_sb800;
-
-               adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
-               adapdata->sb800_main = true;
-               adapdata->port = port;
-               adapdata->mutex = mutex;
        }
 
        return retval;
@@ -714,19 +706,20 @@ error:
                }
        }
 
-       kfree(mutex);
-
        return retval;
 }
 
 static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
        int retval;
+       bool is_sb800 = false;
 
        if ((dev->vendor == PCI_VENDOR_ID_ATI &&
             dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
             dev->revision >= 0x40) ||
            dev->vendor == PCI_VENDOR_ID_AMD) {
+               is_sb800 = true;
+
                if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
                        dev_err(&dev->dev,
                        "SMBus base address index region 0x%x already in use!\n",
@@ -756,7 +749,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        return retval;
 
                /* Try to register main SMBus adapter, give up if we can't */
-               retval = piix4_add_adapter(dev, retval, "main",
+               retval = piix4_add_adapter(dev, retval, false, 0, "",
                                           &piix4_main_adapters[0]);
                if (retval < 0)
                        return retval;
@@ -783,7 +776,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (retval > 0) {
                /* Try to add the aux adapter if it exists,
                 * piix4_add_adapter will clean up if this fails */
-               piix4_add_adapter(dev, retval, piix4_aux_port_name_sb800,
+               piix4_add_adapter(dev, retval, false, 0,
+                                 is_sb800 ? piix4_aux_port_name_sb800 : "",
                                  &piix4_aux_adapter);
        }
 
@@ -798,10 +792,8 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
                i2c_del_adapter(adap);
                if (adapdata->port == 0) {
                        release_region(adapdata->smba, SMBIOSIZE);
-                       if (adapdata->sb800_main) {
-                               kfree(adapdata->mutex);
+                       if (adapdata->sb800_main)
                                release_region(SB800_PIIX4_SMB_IDX, 2);
-                       }
                }
                kfree(adapdata);
                kfree(adap);
index edc29b173f6c9012635771116a6cca23193a096e..833ea9dd4464b664e9d11103c8543961a65f5f01 100644 (file)
@@ -213,6 +213,7 @@ config STK8312
 config STK8BA50
        tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
        depends on I2C
+       depends on IIO_TRIGGER
        help
          Say yes here to get support for the Sensortek STK8BA50 3-axis
          accelerometer.
index 605ff42c46310201e7aba3fd2f7ab42a0a873e04..283ded7747a9379be4c105ef49964d5c5d674b0f 100644 (file)
@@ -175,6 +175,7 @@ config DA9150_GPADC
 config EXYNOS_ADC
        tristate "Exynos ADC driver support"
        depends on ARCH_EXYNOS || ARCH_S3C24XX || ARCH_S3C64XX || (OF && COMPILE_TEST)
+       depends on HAS_IOMEM
        help
          Core support for the ADC block found in the Samsung EXYNOS series
          of SoCs for drivers such as the touchscreen and hwmon to use to share
@@ -207,6 +208,7 @@ config INA2XX_ADC
 config IMX7D_ADC
        tristate "IMX7D ADC driver"
        depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Say yes here to build support for IMX7D ADC.
 
@@ -409,6 +411,7 @@ config TWL6030_GPADC
 config VF610_ADC
        tristate "Freescale vf610 ADC driver"
        depends on OF
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index 942320e32753c3838e967e9e0a96d4db2d691fce..c1e05532d437f263a9aa3d7f7c96147b13bfe682 100644 (file)
@@ -289,7 +289,7 @@ static int tiadc_iio_buffered_hardware_setup(struct iio_dev *indio_dev,
                goto error_kfifo_free;
 
        indio_dev->setup_ops = setup_ops;
-       indio_dev->modes |= INDIO_BUFFER_HARDWARE;
+       indio_dev->modes |= INDIO_BUFFER_SOFTWARE;
 
        return 0;
 
index 43d14588448d65f507ab02ce22c0e01d7a9008ad..b4dde8315210c6519c7e2a7db4bd8bf3b6607988 100644 (file)
@@ -300,6 +300,7 @@ static int mcp4725_probe(struct i2c_client *client,
        data->client = client;
 
        indio_dev->dev.parent = &client->dev;
+       indio_dev->name = id->name;
        indio_dev->info = &mcp4725_info;
        indio_dev->channels = &mcp4725_channel;
        indio_dev->num_channels = 1;
index 1165b1c4f9d67bd93db24f784da5b13010d9fd99..cfc5a051ab9f3946bfdfd4aaf5aaac01f7245f36 100644 (file)
@@ -117,7 +117,7 @@ static int dht11_decode(struct dht11 *dht11, int offset, int timeres)
        if (((hum_int + hum_dec + temp_int + temp_dec) & 0xff) != checksum)
                return -EIO;
 
-       dht11->timestamp = ktime_get_real_ns();
+       dht11->timestamp = ktime_get_boot_ns();
        if (hum_int < 20) {  /* DHT22 */
                dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
                                        ((temp_int & 0x80) ? -100 : 100);
@@ -145,7 +145,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data)
 
        /* TODO: Consider making the handler safe for IRQ sharing */
        if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
-               dht11->edges[dht11->num_edges].ts = ktime_get_real_ns();
+               dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
                dht11->edges[dht11->num_edges++].value =
                                                gpio_get_value(dht11->gpio);
 
@@ -164,7 +164,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
        int ret, timeres;
 
        mutex_lock(&dht11->lock);
-       if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_real_ns()) {
+       if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) {
                timeres = ktime_get_resolution_ns();
                if (DHT11_DATA_BIT_HIGH < 2 * timeres) {
                        dev_err(dht11->dev, "timeresolution %dns too low\n",
@@ -279,7 +279,7 @@ static int dht11_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       dht11->timestamp = ktime_get_real_ns() - DHT11_DATA_VALID_TIME - 1;
+       dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1;
        dht11->num_edges = -1;
 
        platform_set_drvdata(pdev, iio);
index cb32b593f1c55c023046d97cfc41263ea3818d89..36607d52fee065f9e2aa82f14215897e2c5b44b1 100644 (file)
@@ -43,7 +43,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
                return -ENOMEM;
 
        rx = adis->buffer;
-       tx = rx + indio_dev->scan_bytes;
+       tx = rx + scan_count;
 
        spi_message_init(&adis->msg);
 
index 48fbc0bc7e2a1d499015001bace2c46db2c8b440..8f8d1370ed8b9b3359d3a4f1135767a622df2c21 100644 (file)
@@ -5,9 +5,9 @@
 config INV_MPU6050_IIO
        tristate "Invensense MPU6050 devices"
        depends on I2C && SYSFS
+       depends on I2C_MUX
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
-       select I2C_MUX
        help
          This driver supports the Invensense MPU6050 devices.
          This driver can also support MPU6500 in MPU6050 compatibility mode
index 80fbbfd76faf9e1859eefb1b5dc55b2129f9858b..734a0042de0cb4265ee3145ee48f22a19b8b50af 100644 (file)
@@ -349,6 +349,8 @@ EXPORT_SYMBOL_GPL(iio_channel_get);
 
 void iio_channel_release(struct iio_channel *channel)
 {
+       if (!channel)
+               return;
        iio_device_put(channel->indio_dev);
        kfree(channel);
 }
index 60537ec0c923b98c45160d011f4a17ac932e4609..53201d99a16c8d760f4ec909c8c5fb1cba6bc8be 100644 (file)
@@ -54,7 +54,9 @@ static const struct iio_chan_spec acpi_als_channels[] = {
                        .realbits       = 32,
                        .storagebits    = 32,
                },
-               .info_mask_separate     = BIT(IIO_CHAN_INFO_RAW),
+               /* _RAW is here for backward ABI compatibility */
+               .info_mask_separate     = BIT(IIO_CHAN_INFO_RAW) |
+                                         BIT(IIO_CHAN_INFO_PROCESSED),
        },
 };
 
@@ -152,7 +154,7 @@ static int acpi_als_read_raw(struct iio_dev *indio_dev,
        s32 temp_val;
        int ret;
 
-       if (mask != IIO_CHAN_INFO_RAW)
+       if ((mask != IIO_CHAN_INFO_PROCESSED) && (mask != IIO_CHAN_INFO_RAW))
                return -EINVAL;
 
        /* we support only illumination (_ALI) so far. */
index 809a961b9a7f6d0d0077114e12767e4e267c7606..6bf89d8f374191cee48f258d1c25810b0e5dc410 100644 (file)
@@ -180,7 +180,7 @@ static const struct ltr501_samp_table ltr501_ps_samp_table[] = {
                        {500000, 2000000}
 };
 
-static unsigned int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
+static int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
                                           int len, int val, int val2)
 {
        int i, freq;
index f5ecd6e19f5de725e715e547a5527f5875050561..a0d7deeac62f78dfc416bf597e0b894436c75a8c 100644 (file)
@@ -117,7 +117,7 @@ static int mpl115_read_raw(struct iio_dev *indio_dev,
                *val = ret >> 6;
                return IIO_VAL_INT;
        case IIO_CHAN_INFO_OFFSET:
-               *val = 605;
+               *val = -605;
                *val2 = 750000;
                return IIO_VAL_INT_PLUS_MICRO;
        case IIO_CHAN_INFO_SCALE:
index 93e29fb67fa00f94e9ca0a614addfc5cde7ebb76..db35e04a063767a9b2b6011cf3d0345c9d0929bd 100644 (file)
@@ -87,7 +87,7 @@ static int lidar_i2c_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
 
        ret = i2c_transfer(client->adapter, msg, 2);
 
-       return (ret == 2) ? 0 : ret;
+       return (ret == 2) ? 0 : -EIO;
 }
 
 static int lidar_smbus_xfer(struct lidar_data *data, u8 reg, u8 *val, int len)
index aa26f3c3416bbbcd04dac1ec5381fcb081105d5e..8a8440c0eed1bf64519c5b7bce7903d010246c1c 100644 (file)
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
        depends on NET
        depends on INET
        depends on m || IPV6 != m
+       select IRQ_POLL
        ---help---
          Core support for InfiniBand (IB).  Make sure to also select
          any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
        depends on INFINIBAND
        default y
 
+config INFINIBAND_ADDR_TRANS_CONFIGFS
+       bool
+       depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
+       default y
+       ---help---
+         ConfigFS support for RDMA communication manager (CM).
+         This allows the user to config the default GID type that the CM
+         uses for each device, when initiaing new connections.
+
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
index d43a8994ac5c129d201b0f164442c618192eea9d..f818538a7f4e118b309052c3d43a1aa18d5dbe65 100644 (file)
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) +=    ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o \
                                        $(user_access-y)
 
-ib_core-y :=                   packer.o ud_header.o verbs.o sysfs.o \
+ib_core-y :=                   packer.o ud_header.o verbs.o cq.o sysfs.o \
                                device.o fmr_pool.o cache.o netlink.o \
                                roce_gid_mgmt.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y :=                    iwcm.o iwpm_util.o iwpm_msg.o
 
 rdma_cm-y :=                   cma.o
 
+rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
+
 rdma_ucm-y :=                  ucma.o
 
 ib_addr-y :=                   addr.o
index 34b1adad07aacf92e9bbfa9621e084f2ec756f74..337353d86cfadec33064e10117539769fcc0f95d 100644 (file)
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 }
 EXPORT_SYMBOL(rdma_copy_addr);
 
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+int rdma_translate_ip(const struct sockaddr *addr,
+                     struct rdma_dev_addr *dev_addr,
                      u16 *vlan_id)
 {
        struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
        switch (addr->sa_family) {
        case AF_INET:
                dev = ip_dev_find(dev_addr->net,
-                       ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+                       ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
 
                if (!dev)
                        return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
                rcu_read_lock();
                for_each_netdev_rcu(dev_addr->net, dev) {
                        if (ipv6_chk_addr(dev_addr->net,
-                                         &((struct sockaddr_in6 *) addr)->sin6_addr,
+                                         &((const struct sockaddr_in6 *)addr)->sin6_addr,
                                          dev, 1)) {
                                ret = rdma_copy_addr(dev_addr, dev, NULL);
                                if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+                       const void *daddr)
 {
        struct neighbour *n;
        int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
 }
 
 static int addr4_resolve(struct sockaddr_in *src_in,
-                        struct sockaddr_in *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct rtable **prt)
 {
        __be32 src_ip = src_in->sin_addr.s_addr;
        __be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
        src_in->sin_family = AF_INET;
        src_in->sin_addr.s_addr = fl4.saddr;
 
-       if (rt->dst.dev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-               goto put;
-       }
+       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+        * routable) and we could set the network type accordingly.
+        */
+       if (rt->rt_uses_gateway)
+               addr->network = RDMA_NETWORK_IPV4;
 
-       /* If the device does ARP internally, return 'done' */
-       if (rt->dst.dev->flags & IFF_NOARP) {
-               ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
-               goto put;
-       }
+       addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 
-       ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
-put:
-       ip_rt_put(rt);
+       *prt = rt;
+       return 0;
 out:
        return ret;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static int addr6_resolve(struct sockaddr_in6 *src_in,
-                        struct sockaddr_in6 *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct dst_entry **pdst)
 {
        struct flowi6 fl6;
        struct dst_entry *dst;
+       struct rt6_info *rt;
        int ret;
 
        memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
        if ((ret = dst->error))
                goto put;
 
+       rt = (struct rt6_info *)dst;
        if (ipv6_addr_any(&fl6.saddr)) {
                ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
                                         &fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
                src_in->sin6_addr = fl6.saddr;
        }
 
-       if (dst->dev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
-               goto put;
-       }
+       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+        * routable) and we could set the network type accordingly.
+        */
+       if (rt->rt6i_flags & RTF_GATEWAY)
+               addr->network = RDMA_NETWORK_IPV6;
 
-       /* If the device does ARP internally, return 'done' */
-       if (dst->dev->flags & IFF_NOARP) {
-               ret = rdma_copy_addr(addr, dst->dev, NULL);
-               goto put;
-       }
+       addr->hoplimit = ip6_dst_hoplimit(dst);
 
-       ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+       *pdst = dst;
+       return 0;
 put:
        dst_release(dst);
        return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
-                        struct sockaddr_in6 *dst_in,
-                        struct rdma_dev_addr *addr)
+                        const struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr,
+                        struct dst_entry **pdst)
 {
        return -EADDRNOTAVAIL;
 }
 #endif
 
+static int addr_resolve_neigh(struct dst_entry *dst,
+                             const struct sockaddr *dst_in,
+                             struct rdma_dev_addr *addr)
+{
+       if (dst->dev->flags & IFF_LOOPBACK) {
+               int ret;
+
+               ret = rdma_translate_ip(dst_in, addr, NULL);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr,
+                              MAX_ADDR_LEN);
+
+               return ret;
+       }
+
+       /* If the device doesn't do ARP internally */
+       if (!(dst->dev->flags & IFF_NOARP)) {
+               const struct sockaddr_in *dst_in4 =
+                       (const struct sockaddr_in *)dst_in;
+               const struct sockaddr_in6 *dst_in6 =
+                       (const struct sockaddr_in6 *)dst_in;
+
+               return dst_fetch_ha(dst, addr,
+                                   dst_in->sa_family == AF_INET ?
+                                   (const void *)&dst_in4->sin_addr.s_addr :
+                                   (const void *)&dst_in6->sin6_addr);
+       }
+
+       return rdma_copy_addr(addr, dst->dev, NULL);
+}
+
 static int addr_resolve(struct sockaddr *src_in,
-                       struct sockaddr *dst_in,
-                       struct rdma_dev_addr *addr)
+                       const struct sockaddr *dst_in,
+                       struct rdma_dev_addr *addr,
+                       bool resolve_neigh)
 {
+       struct net_device *ndev;
+       struct dst_entry *dst;
+       int ret;
+
        if (src_in->sa_family == AF_INET) {
-               return addr4_resolve((struct sockaddr_in *) src_in,
-                       (struct sockaddr_in *) dst_in, addr);
-       } else
-               return addr6_resolve((struct sockaddr_in6 *) src_in,
-                       (struct sockaddr_in6 *) dst_in, addr);
+               struct rtable *rt = NULL;
+               const struct sockaddr_in *dst_in4 =
+                       (const struct sockaddr_in *)dst_in;
+
+               ret = addr4_resolve((struct sockaddr_in *)src_in,
+                                   dst_in4, addr, &rt);
+               if (ret)
+                       return ret;
+
+               if (resolve_neigh)
+                       ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+
+               ndev = rt->dst.dev;
+               dev_hold(ndev);
+
+               ip_rt_put(rt);
+       } else {
+               const struct sockaddr_in6 *dst_in6 =
+                       (const struct sockaddr_in6 *)dst_in;
+
+               ret = addr6_resolve((struct sockaddr_in6 *)src_in,
+                                   dst_in6, addr,
+                                   &dst);
+               if (ret)
+                       return ret;
+
+               if (resolve_neigh)
+                       ret = addr_resolve_neigh(dst, dst_in, addr);
+
+               ndev = dst->dev;
+               dev_hold(ndev);
+
+               dst_release(dst);
+       }
+
+       addr->bound_dev_if = ndev->ifindex;
+       addr->net = dev_net(ndev);
+       dev_put(ndev);
+
+       return ret;
 }
 
 static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
                if (req->status == -ENODATA) {
                        src_in = (struct sockaddr *) &req->src_addr;
                        dst_in = (struct sockaddr *) &req->dst_addr;
-                       req->status = addr_resolve(src_in, dst_in, req->addr);
+                       req->status = addr_resolve(src_in, dst_in, req->addr,
+                                                  true);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
                        else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->client = client;
        atomic_inc(&client->refcount);
 
-       req->status = addr_resolve(src_in, dst_in, addr);
+       req->status = addr_resolve(src_in, dst_in, addr, true);
        switch (req->status) {
        case 0:
                req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr)
+{
+       struct sockaddr_storage ssrc_addr = {};
+       struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+
+       if (src_addr) {
+               if (src_addr->sa_family != dst_addr->sa_family)
+                       return -EINVAL;
+
+               memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+       } else {
+               src_in->sa_family = dst_addr->sa_family;
+       }
+
+       return addr_resolve(src_in, dst_addr, addr, false);
+}
+EXPORT_SYMBOL(rdma_resolve_ip_route);
+
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
        struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
        complete(&((struct resolve_cb_context *)context)->comp);
 }
 
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *dmac, u16 *vlan_id, int if_index)
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *dmac, u16 *vlan_id, int *if_index,
+                                int *hoplimit)
 {
        int ret = 0;
        struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
-       dev_addr.bound_dev_if = if_index;
+       if (if_index)
+               dev_addr.bound_dev_if = *if_index;
        dev_addr.net = &init_net;
 
        ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
        if (!dev)
                return -ENODEV;
+       if (if_index)
+               *if_index = dev_addr.bound_dev_if;
        if (vlan_id)
                *vlan_id = rdma_vlan_dev_vlan_id(dev);
+       if (hoplimit)
+               *hoplimit = dev_addr.hoplimit;
        dev_put(dev);
        return ret;
 }
-EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 {
index 89bebeada38b9b5f6a09d9dd896c8047fa57d7ad..53343ffbff7a1302b03ce5f6a66116b06f1e60b5 100644 (file)
@@ -64,6 +64,7 @@ enum gid_attr_find_mask {
        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
+       GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
 };
 
 enum gid_table_entry_props {
@@ -81,10 +82,6 @@ enum gid_table_write_action {
 };
 
 struct ib_gid_table_entry {
-       /* This lock protects an entry from being
-        * read and written simultaneously.
-        */
-       rwlock_t            lock;
        unsigned long       props;
        union ib_gid        gid;
        struct ib_gid_attr  attr;
@@ -109,28 +106,86 @@ struct ib_gid_table {
         * are locked by this lock.
         **/
        struct mutex         lock;
+       /* This lock protects the table entries from being
+        * read and written simultaneously.
+        */
+       rwlock_t             rwlock;
        struct ib_gid_table_entry *data_vec;
 };
 
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+{
+       if (rdma_cap_roce_gid_table(ib_dev, port)) {
+               struct ib_event event;
+
+               event.device            = ib_dev;
+               event.element.port_num  = port;
+               event.event             = IB_EVENT_GID_CHANGE;
+
+               ib_dispatch_event(&event);
+       }
+}
+
+static const char * const gid_type_str[] = {
+       [IB_GID_TYPE_IB]        = "IB/RoCE v1",
+       [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
+};
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
+{
+       if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+               return gid_type_str[gid_type];
+
+       return "Invalid GID type";
+}
+EXPORT_SYMBOL(ib_cache_gid_type_str);
+
+int ib_cache_gid_parse_type_str(const char *buf)
+{
+       unsigned int i;
+       size_t len;
+       int err = -EINVAL;
+
+       len = strlen(buf);
+       if (len == 0)
+               return -EINVAL;
+
+       if (buf[len - 1] == '\n')
+               len--;
+
+       for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+               if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
+                   len == strlen(gid_type_str[i])) {
+                       err = i;
+                       break;
+               }
+
+       return err;
+}
+EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+
+/* This function expects that rwlock will be write locked in all
+ * scenarios and that lock will be locked in sleep-able (RoCE)
+ * scenarios.
+ */
 static int write_gid(struct ib_device *ib_dev, u8 port,
                     struct ib_gid_table *table, int ix,
                     const union ib_gid *gid,
                     const struct ib_gid_attr *attr,
                     enum gid_table_write_action action,
                     bool  default_gid)
+       __releases(&table->rwlock) __acquires(&table->rwlock)
 {
        int ret = 0;
        struct net_device *old_net_dev;
-       unsigned long flags;
 
        /* in rdma_cap_roce_gid_table, this funciton should be protected by a
         * sleep-able lock.
         */
-       write_lock_irqsave(&table->data_vec[ix].lock, flags);
 
        if (rdma_cap_roce_gid_table(ib_dev, port)) {
                table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
-               write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
+               write_unlock_irq(&table->rwlock);
                /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
                 * RoCE providers and thus only updates the cache.
                 */
@@ -140,7 +195,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
                else if (action == GID_TABLE_WRITE_ACTION_DEL)
                        ret = ib_dev->del_gid(ib_dev, port, ix,
                                              &table->data_vec[ix].context);
-               write_lock_irqsave(&table->data_vec[ix].lock, flags);
+               write_lock_irq(&table->rwlock);
        }
 
        old_net_dev = table->data_vec[ix].attr.ndev;
@@ -162,17 +217,6 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 
        table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
 
-       write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
-
-       if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
-               struct ib_event event;
-
-               event.device            = ib_dev;
-               event.element.port_num  = port;
-               event.event             = IB_EVENT_GID_CHANGE;
-
-               ib_dispatch_event(&event);
-       }
        return ret;
 }
 
@@ -201,41 +245,58 @@ static int del_gid(struct ib_device *ib_dev, u8 port,
                         GID_TABLE_WRITE_ACTION_DEL, default_gid);
 }
 
+/* rwlock should be read locked */
 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
                    const struct ib_gid_attr *val, bool default_gid,
-                   unsigned long mask)
+                   unsigned long mask, int *pempty)
 {
-       int i;
+       int i = 0;
+       int found = -1;
+       int empty = pempty ? -1 : 0;
 
-       for (i = 0; i < table->sz; i++) {
-               unsigned long flags;
-               struct ib_gid_attr *attr = &table->data_vec[i].attr;
+       while (i < table->sz && (found < 0 || empty < 0)) {
+               struct ib_gid_table_entry *data = &table->data_vec[i];
+               struct ib_gid_attr *attr = &data->attr;
+               int curr_index = i;
 
-               read_lock_irqsave(&table->data_vec[i].lock, flags);
+               i++;
 
-               if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
-                       goto next;
+               if (data->props & GID_TABLE_ENTRY_INVALID)
+                       continue;
+
+               if (empty < 0)
+                       if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
+                           !memcmp(attr, &zattr, sizeof(*attr)) &&
+                           !data->props)
+                               empty = curr_index;
+
+               if (found >= 0)
+                       continue;
+
+               if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+                   attr->gid_type != val->gid_type)
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_GID &&
-                   memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
-                       goto next;
+                   memcmp(gid, &data->gid, sizeof(*gid)))
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
                    attr->ndev != val->ndev)
-                       goto next;
+                       continue;
 
                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
-                   !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
+                   !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
                    default_gid)
-                       goto next;
+                       continue;
 
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-               return i;
-next:
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+               found = curr_index;
        }
 
-       return -1;
+       if (pempty)
+               *pempty = empty;
+
+       return found;
 }
 
 static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
@@ -252,6 +313,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        int ix;
        int ret = 0;
        struct net_device *idev;
+       int empty;
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
@@ -275,22 +337,25 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
        }
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_NETDEV);
+                     GID_ATTR_FIND_MASK_GID_TYPE |
+                     GID_ATTR_FIND_MASK_NETDEV, &empty);
        if (ix >= 0)
                goto out_unlock;
 
-       ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_DEFAULT);
-       if (ix < 0) {
+       if (empty < 0) {
                ret = -ENOSPC;
                goto out_unlock;
        }
 
-       add_gid(ib_dev, port, table, ix, gid, attr, false);
+       ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+       if (!ret)
+               dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
        return ret;
 }
@@ -305,17 +370,22 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
        table = ports_table[port - rdma_start_port(ib_dev)];
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false,
                      GID_ATTR_FIND_MASK_GID      |
+                     GID_ATTR_FIND_MASK_GID_TYPE |
                      GID_ATTR_FIND_MASK_NETDEV   |
-                     GID_ATTR_FIND_MASK_DEFAULT);
+                     GID_ATTR_FIND_MASK_DEFAULT,
+                     NULL);
        if (ix < 0)
                goto out_unlock;
 
-       del_gid(ib_dev, port, table, ix, false);
+       if (!del_gid(ib_dev, port, table, ix, false))
+               dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
        return 0;
 }
@@ -326,16 +396,24 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
        int ix;
+       bool deleted = false;
 
        table  = ports_table[port - rdma_start_port(ib_dev)];
 
        mutex_lock(&table->lock);
+       write_lock_irq(&table->rwlock);
 
        for (ix = 0; ix < table->sz; ix++)
                if (table->data_vec[ix].attr.ndev == ndev)
-                       del_gid(ib_dev, port, table, ix, false);
+                       if (!del_gid(ib_dev, port, table, ix, false))
+                               deleted = true;
 
+       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
+
+       if (deleted)
+               dispatch_gid_change_event(ib_dev, port);
+
        return 0;
 }
 
@@ -344,18 +422,14 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 {
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
-       unsigned long flags;
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
        if (index < 0 || index >= table->sz)
                return -EINVAL;
 
-       read_lock_irqsave(&table->data_vec[index].lock, flags);
-       if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
-               read_unlock_irqrestore(&table->data_vec[index].lock, flags);
+       if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
                return -EAGAIN;
-       }
 
        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
        if (attr) {
@@ -364,7 +438,6 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
                        dev_hold(attr->ndev);
        }
 
-       read_unlock_irqrestore(&table->data_vec[index].lock, flags);
        return 0;
 }
 
@@ -378,17 +451,21 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
        struct ib_gid_table *table;
        u8 p;
        int local_index;
+       unsigned long flags;
 
        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
                table = ports_table[p];
-               local_index = find_gid(table, gid, val, false, mask);
+               read_lock_irqsave(&table->rwlock, flags);
+               local_index = find_gid(table, gid, val, false, mask, NULL);
                if (local_index >= 0) {
                        if (index)
                                *index = local_index;
                        if (port)
                                *port = p + rdma_start_port(ib_dev);
+                       read_unlock_irqrestore(&table->rwlock, flags);
                        return 0;
                }
+               read_unlock_irqrestore(&table->rwlock, flags);
        }
 
        return -ENOENT;
@@ -396,11 +473,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 
 static int ib_cache_gid_find(struct ib_device *ib_dev,
                             const union ib_gid *gid,
+                            enum ib_gid_type gid_type,
                             struct net_device *ndev, u8 *port,
                             u16 *index)
 {
-       unsigned long mask = GID_ATTR_FIND_MASK_GID;
-       struct ib_gid_attr gid_attr_val = {.ndev = ndev};
+       unsigned long mask = GID_ATTR_FIND_MASK_GID |
+                            GID_ATTR_FIND_MASK_GID_TYPE;
+       struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
 
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -411,14 +490,17 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
 
 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
                               const union ib_gid *gid,
+                              enum ib_gid_type gid_type,
                               u8 port, struct net_device *ndev,
                               u16 *index)
 {
        int local_index;
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
-       unsigned long mask = GID_ATTR_FIND_MASK_GID;
-       struct ib_gid_attr val = {.ndev = ndev};
+       unsigned long mask = GID_ATTR_FIND_MASK_GID |
+                            GID_ATTR_FIND_MASK_GID_TYPE;
+       struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+       unsigned long flags;
 
        if (port < rdma_start_port(ib_dev) ||
            port > rdma_end_port(ib_dev))
@@ -429,13 +511,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
 
-       local_index = find_gid(table, gid, &val, false, mask);
+       read_lock_irqsave(&table->rwlock, flags);
+       local_index = find_gid(table, gid, &val, false, mask, NULL);
        if (local_index >= 0) {
                if (index)
                        *index = local_index;
+               read_unlock_irqrestore(&table->rwlock, flags);
                return 0;
        }
 
+       read_unlock_irqrestore(&table->rwlock, flags);
        return -ENOENT;
 }
 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
@@ -472,6 +557,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        struct ib_gid_table *table;
        unsigned int i;
+       unsigned long flags;
        bool found = false;
 
        if (!ports_table)
@@ -484,11 +570,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 
        table = ports_table[port - rdma_start_port(ib_dev)];
 
+       read_lock_irqsave(&table->rwlock, flags);
        for (i = 0; i < table->sz; i++) {
                struct ib_gid_attr attr;
-               unsigned long flags;
 
-               read_lock_irqsave(&table->data_vec[i].lock, flags);
                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
                        goto next;
 
@@ -501,11 +586,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
                        found = true;
 
 next:
-               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-
                if (found)
                        break;
        }
+       read_unlock_irqrestore(&table->rwlock, flags);
 
        if (!found)
                return -ENOENT;
@@ -517,9 +601,9 @@ next:
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
-       unsigned int i;
        struct ib_gid_table *table =
                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+
        if (!table)
                return NULL;
 
@@ -530,9 +614,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
        mutex_init(&table->lock);
 
        table->sz = sz;
-
-       for (i = 0; i < sz; i++)
-               rwlock_init(&table->data_vec[i].lock);
+       rwlock_init(&table->rwlock);
 
        return table;
 
@@ -553,30 +635,37 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
                                   struct ib_gid_table *table)
 {
        int i;
+       bool deleted = false;
 
        if (!table)
                return;
 
+       write_lock_irq(&table->rwlock);
        for (i = 0; i < table->sz; ++i) {
                if (memcmp(&table->data_vec[i].gid, &zgid,
                           sizeof(table->data_vec[i].gid)))
-                       del_gid(ib_dev, port, table, i,
-                               table->data_vec[i].props &
-                               GID_ATTR_FIND_MASK_DEFAULT);
+                       if (!del_gid(ib_dev, port, table, i,
+                                    table->data_vec[i].props &
+                                    GID_ATTR_FIND_MASK_DEFAULT))
+                               deleted = true;
        }
+       write_unlock_irq(&table->rwlock);
+
+       if (deleted)
+               dispatch_gid_change_event(ib_dev, port);
 }
 
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  struct net_device *ndev,
+                                 unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode)
 {
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
        union ib_gid gid;
        struct ib_gid_attr gid_attr;
+       struct ib_gid_attr zattr_type = zattr;
        struct ib_gid_table *table;
-       int ix;
-       union ib_gid current_gid;
-       struct ib_gid_attr current_gid_attr = {};
+       unsigned int gid_type;
 
        table  = ports_table[port - rdma_start_port(ib_dev)];
 
@@ -584,46 +673,82 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
        memset(&gid_attr, 0, sizeof(gid_attr));
        gid_attr.ndev = ndev;
 
-       mutex_lock(&table->lock);
-       ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
-
-       /* Coudn't find default GID location */
-       WARN_ON(ix < 0);
-
-       if (!__ib_cache_gid_get(ib_dev, port, ix,
-                               &current_gid, &current_gid_attr) &&
-           mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
-           !memcmp(&gid, &current_gid, sizeof(gid)) &&
-           !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
-               goto unlock;
-
-       if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
-            memcmp(&current_gid_attr, &zattr,
-                   sizeof(current_gid_attr))) &&
-           del_gid(ib_dev, port, table, ix, true)) {
-               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
-                       ix, gid.raw);
-               goto unlock;
-       }
+       for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+               int ix;
+               union ib_gid current_gid;
+               struct ib_gid_attr current_gid_attr = {};
+
+               if (1UL << gid_type & ~gid_type_mask)
+                       continue;
+
+               gid_attr.gid_type = gid_type;
+
+               mutex_lock(&table->lock);
+               write_lock_irq(&table->rwlock);
+               ix = find_gid(table, NULL, &gid_attr, true,
+                             GID_ATTR_FIND_MASK_GID_TYPE |
+                             GID_ATTR_FIND_MASK_DEFAULT,
+                             NULL);
+
+               /* Coudn't find default GID location */
+               WARN_ON(ix < 0);
+
+               zattr_type.gid_type = gid_type;
+
+               if (!__ib_cache_gid_get(ib_dev, port, ix,
+                                       &current_gid, &current_gid_attr) &&
+                   mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
+                   !memcmp(&gid, &current_gid, sizeof(gid)) &&
+                   !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
+                       goto release;
+
+               if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
+                   memcmp(&current_gid_attr, &zattr_type,
+                          sizeof(current_gid_attr))) {
+                       if (del_gid(ib_dev, port, table, ix, true)) {
+                               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
+                                       ix, gid.raw);
+                               goto release;
+                       } else {
+                               dispatch_gid_change_event(ib_dev, port);
+                       }
+               }
 
-       if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
-               if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
-                       pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
-                               gid.raw);
+               if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
+                       if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
+                               pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
+                                       gid.raw);
+                       else
+                               dispatch_gid_change_event(ib_dev, port);
+               }
 
-unlock:
-       if (current_gid_attr.ndev)
-               dev_put(current_gid_attr.ndev);
-       mutex_unlock(&table->lock);
+release:
+               if (current_gid_attr.ndev)
+                       dev_put(current_gid_attr.ndev);
+               write_unlock_irq(&table->rwlock);
+               mutex_unlock(&table->lock);
+       }
 }
 
 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
                                     struct ib_gid_table *table)
 {
-       if (rdma_protocol_roce(ib_dev, port)) {
-               struct ib_gid_table_entry *entry = &table->data_vec[0];
+       unsigned int i;
+       unsigned long roce_gid_type_mask;
+       unsigned int num_default_gids;
+       unsigned int current_gid = 0;
+
+       roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+       num_default_gids = hweight_long(roce_gid_type_mask);
+       for (i = 0; i < num_default_gids && i < table->sz; i++) {
+               struct ib_gid_table_entry *entry =
+                       &table->data_vec[i];
 
                entry->props |= GID_TABLE_ENTRY_DEFAULT;
+               current_gid = find_next_bit(&roce_gid_type_mask,
+                                           BITS_PER_LONG,
+                                           current_gid);
+               entry->attr.gid_type = current_gid++;
        }
 
        return 0;
@@ -728,20 +853,30 @@ int ib_get_cached_gid(struct ib_device *device,
                      union ib_gid     *gid,
                      struct ib_gid_attr *gid_attr)
 {
+       int res;
+       unsigned long flags;
+       struct ib_gid_table **ports_table = device->cache.gid_cache;
+       struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+
        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
                return -EINVAL;
 
-       return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+       read_lock_irqsave(&table->rwlock, flags);
+       res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+       read_unlock_irqrestore(&table->rwlock, flags);
+
+       return res;
 }
 EXPORT_SYMBOL(ib_get_cached_gid);
 
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      enum ib_gid_type gid_type,
                       struct net_device *ndev,
                       u8               *port_num,
                       u16              *index)
 {
-       return ib_cache_gid_find(device, gid, ndev, port_num, index);
+       return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
 
@@ -956,10 +1091,12 @@ static void ib_cache_update(struct ib_device *device,
 
        device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
        if (!use_roce_gid_table) {
+               write_lock(&table->rwlock);
                for (i = 0; i < gid_cache->table_len; i++) {
                        modify_gid(device, port, table, i, gid_cache->table + i,
                                   &zattr, false);
                }
+               write_unlock(&table->rwlock);
        }
 
        device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
index 0a26dd6d9b19f96d97c9b3b244509892afae7023..1d92e091e22ef0bdb64ee7433eb2cf5fbba9600c 100644 (file)
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
        read_lock_irqsave(&cm.device_lock, flags);
        list_for_each_entry(cm_dev, &cm.device_list, list) {
                if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-                                       ndev, &p, NULL)) {
+                                       path->gid_type, ndev, &p, NULL)) {
                        port = cm_dev->port[p-1];
                        break;
                }
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
        wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
 
        /* Check if the device started its remove_one */
-       spin_lock_irq(&cm.lock);
+       spin_lock_irqsave(&cm.lock, flags);
        if (!cm_dev->going_down)
                queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
                                   msecs_to_jiffies(wait_time));
-       spin_unlock_irq(&cm.lock);
+       spin_unlock_irqrestore(&cm.lock, flags);
 
        cm_id_priv->timewait_info = NULL;
 }
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
        struct ib_cm_id *cm_id;
        struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
        struct cm_req_msg *req_msg;
+       union ib_gid gid;
+       struct ib_gid_attr gid_attr;
        int ret;
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 
        memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
-       ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+       work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+       ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+                               work->port->port_num,
+                               cm_id_priv->av.ah_attr.grh.sgid_index,
+                               &gid, &gid_attr);
+       if (!ret) {
+               if (gid_attr.ndev) {
+                       work->path[0].ifindex = gid_attr.ndev->ifindex;
+                       work->path[0].net = dev_net(gid_attr.ndev);
+                       dev_put(gid_attr.ndev);
+               }
+               work->path[0].gid_type = gid_attr.gid_type;
+               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+       }
        if (ret) {
-               ib_get_cached_gid(work->port->cm_dev->ib_device,
-                                 work->port->port_num, 0, &work->path[0].sgid,
-                                 NULL);
+               int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+                                           work->port->port_num, 0,
+                                           &work->path[0].sgid,
+                                           &gid_attr);
+               if (!err && gid_attr.ndev) {
+                       work->path[0].ifindex = gid_attr.ndev->ifindex;
+                       work->path[0].net = dev_net(gid_attr.ndev);
+                       dev_put(gid_attr.ndev);
+               }
+               work->path[0].gid_type = gid_attr.gid_type;
                ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
                               &work->path[0].sgid, sizeof work->path[0].sgid,
                               NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
 EXPORT_SYMBOL(ib_cm_notify);
 
 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+                           struct ib_mad_send_buf *send_buf,
                            struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
 }
 EXPORT_SYMBOL(ib_cm_init_qp_attr);
 
-static void cm_get_ack_delay(struct cm_device *cm_dev)
-{
-       struct ib_device_attr attr;
-
-       if (ib_query_device(cm_dev->ib_device, &attr))
-               cm_dev->ack_delay = 0; /* acks will rely on packet life time */
-       else
-               cm_dev->ack_delay = attr.local_ca_ack_delay;
-}
-
 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
                               char *buf)
 {
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
                return;
 
        cm_dev->ib_device = ib_device;
-       cm_get_ack_delay(cm_dev);
+       cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
        cm_dev->going_down = 0;
        cm_dev->device = device_create(&cm_class, &ib_device->dev,
                                       MKDEV(0, 0), NULL,
index 2d762a2ecd81252cde34cbc2dbc1083c1d8832dc..9729639df407abdc382959ef8151fbbcc3370589 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/in6.h>
 #include <linux/mutex.h>
 #include <linux/random.h>
+#include <linux/igmp.h>
 #include <linux/idr.h>
 #include <linux/inetdevice.h>
 #include <linux/slab.h>
@@ -60,6 +61,8 @@
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
 
+#include "core_priv.h"
+
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
        struct completion       comp;
        atomic_t                refcount;
        struct list_head        id_list;
+       enum ib_gid_type        *default_gid_type;
 };
 
 struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
        CMA_OPTION_AFONLY,
 };
 
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+       atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void               *cookie)
+{
+       struct cma_device *cma_dev;
+       struct cma_device *found_cma_dev = NULL;
+
+       mutex_lock(&lock);
+
+       list_for_each_entry(cma_dev, &dev_list, list)
+               if (filter(cma_dev->device, cookie)) {
+                       found_cma_dev = cma_dev;
+                       break;
+               }
+
+       if (found_cma_dev)
+               cma_ref_dev(found_cma_dev);
+       mutex_unlock(&lock);
+       return found_cma_dev;
+}
+
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port)
+{
+       if (port < rdma_start_port(cma_dev->device) ||
+           port > rdma_end_port(cma_dev->device))
+               return -EINVAL;
+
+       return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port,
+                            enum ib_gid_type default_gid_type)
+{
+       unsigned long supported_gids;
+
+       if (port < rdma_start_port(cma_dev->device) ||
+           port > rdma_end_port(cma_dev->device))
+               return -EINVAL;
+
+       supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
+
+       if (!(supported_gids & 1 << default_gid_type))
+               return -EINVAL;
+
+       cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
+               default_gid_type;
+
+       return 0;
+}
+
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
+{
+       return cma_dev->device;
+}
+
 /*
  * Device removal can occur at anytime, so we need extra handling to
  * serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
        u8                      tos;
        u8                      reuseaddr;
        u8                      afonly;
+       enum ib_gid_type        gid_type;
 };
 
 struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
        void                    *context;
        struct sockaddr_storage addr;
        struct kref             mcref;
+       bool                    igmp_joined;
 };
 
 struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
-static void cma_attach_to_dev(struct rdma_id_private *id_priv,
-                             struct cma_device *cma_dev)
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
 {
-       atomic_inc(&cma_dev->refcount);
+       struct in_device *in_dev = NULL;
+
+       if (ndev) {
+               rtnl_lock();
+               in_dev = __in_dev_get_rtnl(ndev);
+               if (in_dev) {
+                       if (join)
+                               ip_mc_inc_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+                       else
+                               ip_mc_dec_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+               }
+               rtnl_unlock();
+       }
+       return (in_dev) ? 0 : -ENODEV;
+}
+
+static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
+                              struct cma_device *cma_dev)
+{
+       cma_ref_dev(cma_dev);
        id_priv->cma_dev = cma_dev;
+       id_priv->gid_type = 0;
        id_priv->id.device = cma_dev->device;
        id_priv->id.route.addr.dev_addr.transport =
                rdma_node_get_transport(cma_dev->device->node_type);
        list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+                             struct cma_device *cma_dev)
+{
+       _cma_attach_to_dev(id_priv, cma_dev);
+       id_priv->gid_type =
+               cma_dev->default_gid_type[id_priv->id.port_num -
+                                         rdma_start_port(cma_dev->device)];
+}
+
+void cma_deref_dev(struct cma_device *cma_dev)
 {
        if (atomic_dec_and_test(&cma_dev->refcount))
                complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 }
 
 static inline int cma_validate_port(struct ib_device *device, u8 port,
+                                   enum ib_gid_type gid_type,
                                      union ib_gid *gid, int dev_type,
                                      int bound_if_index)
 {
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
        if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
                return ret;
 
-       if (dev_type == ARPHRD_ETHER)
+       if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
                ndev = dev_get_by_index(&init_net, bound_if_index);
+               if (ndev && ndev->flags & IFF_LOOPBACK) {
+                       pr_info("detected loopback device\n");
+                       dev_put(ndev);
 
-       ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+                       if (!device->get_netdev)
+                               return -EOPNOTSUPP;
+
+                       ndev = device->get_netdev(device, port);
+                       if (!ndev)
+                               return -ENODEV;
+               }
+       } else {
+               gid_type = IB_GID_TYPE_IB;
+       }
+
+       ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
+                                        ndev, NULL);
 
        if (ndev)
                dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                gidp = rdma_protocol_roce(cma_dev->device, port) ?
                       &iboe_gid : &gid;
 
-               ret = cma_validate_port(cma_dev->device, port, gidp,
+               ret = cma_validate_port(cma_dev->device, port,
+                                       rdma_protocol_ib(cma_dev->device, port) ?
+                                       IB_GID_TYPE_IB :
+                                       listen_id_priv->gid_type, gidp,
                                        dev_addr->dev_type,
                                        dev_addr->bound_dev_if);
                if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                        gidp = rdma_protocol_roce(cma_dev->device, port) ?
                               &iboe_gid : &gid;
 
-                       ret = cma_validate_port(cma_dev->device, port, gidp,
-                                               dev_addr->dev_type,
+                       ret = cma_validate_port(cma_dev->device, port,
+                                               rdma_protocol_ib(cma_dev->device, port) ?
+                                               IB_GID_TYPE_IB :
+                                               cma_dev->default_gid_type[port - 1],
+                                               gidp, dev_addr->dev_type,
                                                dev_addr->bound_dev_if);
                        if (!ret) {
                                id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
                                      id_priv->id.port_num)) {
                        ib_sa_free_multicast(mc->multicast.ib);
                        kfree(mc);
-               } else
+               } else {
+                       if (mc->igmp_joined) {
+                               struct rdma_dev_addr *dev_addr =
+                                       &id_priv->id.route.addr.dev_addr;
+                               struct net_device *ndev = NULL;
+
+                               if (dev_addr->bound_dev_if)
+                                       ndev = dev_get_by_index(&init_net,
+                                                               dev_addr->bound_dev_if);
+                               if (ndev) {
+                                       cma_igmp_send(ndev,
+                                                     &mc->multicast.ib->rec.mgid,
+                                                     false);
+                                       dev_put(ndev);
+                               }
+                       }
                        kref_put(&mc->mcref, release_mc);
+               }
        }
 }
 
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        struct rdma_id_private *listen_id, *conn_id;
        struct rdma_cm_event event;
        int ret;
-       struct ib_device_attr attr;
        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
        memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
 
-       ret = ib_query_device(conn_id->id.device, &attr);
-       if (ret) {
-               mutex_unlock(&conn_id->handler_mutex);
-               rdma_destroy_id(new_cm_id);
-               goto out;
-       }
-
        memset(&event, 0, sizeof event);
        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
        event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
               rdma_addr_size(cma_src_addr(id_priv)));
 
-       cma_attach_to_dev(dev_id_priv, cma_dev);
+       _cma_attach_to_dev(dev_id_priv, cma_dev);
        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
        atomic_inc(&id_priv->refcount);
        dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
        if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+               if (!ndev)
+                       return -ENODEV;
+
+               if (ndev->flags & IFF_LOOPBACK) {
+                       dev_put(ndev);
+                       if (!id_priv->id.device->get_netdev)
+                               return -EOPNOTSUPP;
+
+                       ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+                                                             id_priv->id.port_num);
+                       if (!ndev)
+                               return -ENODEV;
+               }
+
                route->path_rec->net = &init_net;
-               route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+               route->path_rec->ifindex = ndev->ifindex;
+               route->path_rec->gid_type = id_priv->gid_type;
        }
        if (!ndev) {
                ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
                    &route->path_rec->dgid);
 
-       route->path_rec->hop_limit = 1;
+       /* Use the hint from IP Stack to select GID Type */
+       if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+               route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+       if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
+               /* TODO: get the hoplimit from the inet/inet6 device */
+               route->path_rec->hop_limit = addr->dev_addr.hoplimit;
+       else
+               route->path_rec->hop_limit = 1;
        route->path_rec->reversible = 1;
        route->path_rec->pkey = cpu_to_be16(0xffff);
        route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
        event.status = status;
        event.param.ud.private_data = mc->context;
        if (!status) {
+               struct rdma_dev_addr *dev_addr =
+                       &id_priv->id.route.addr.dev_addr;
+               struct net_device *ndev =
+                       dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               enum ib_gid_type gid_type =
+                       id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                       rdma_start_port(id_priv->cma_dev->device)];
+
                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
                ib_init_ah_from_mcmember(id_priv->id.device,
                                         id_priv->id.port_num, &multicast->rec,
+                                        ndev, gid_type,
                                         &event.param.ud.ah_attr);
                event.param.ud.qp_num = 0xFFFFFF;
                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+               if (ndev)
+                       dev_put(ndev);
        } else
                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 {
        struct iboe_mcast_work *work;
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-       int err;
+       int err = 0;
        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
        struct net_device *ndev = NULL;
+       enum ib_gid_type gid_type;
 
        if (cma_zero_addr((struct sockaddr *)&mc->addr))
                return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
        mc->multicast.ib->rec.hop_limit = 1;
        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+       gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                  rdma_start_port(id_priv->cma_dev->device)];
+       if (addr->sa_family == AF_INET) {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+                                           true);
+               if (!err) {
+                       mc->igmp_joined = true;
+                       mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+               }
+       } else {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = -ENOTSUPP;
+       }
        dev_put(ndev);
-       if (!mc->multicast.ib->rec.mtu) {
-               err = -EINVAL;
+       if (err || !mc->multicast.ib->rec.mtu) {
+               if (!err)
+                       err = -EINVAL;
                goto out2;
        }
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
        memcpy(&mc->addr, addr, rdma_addr_size(addr));
        mc->context = context;
        mc->id_priv = id_priv;
-
+       mc->igmp_joined = false;
        spin_lock(&id_priv->lock);
        list_add(&mc->list, &id_priv->mc_list);
        spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
                        if (rdma_cap_ib_mcast(id->device, id->port_num)) {
                                ib_sa_free_multicast(mc->multicast.ib);
                                kfree(mc);
-                       } else if (rdma_protocol_roce(id->device, id->port_num))
+                       } else if (rdma_protocol_roce(id->device, id->port_num)) {
+                               if (mc->igmp_joined) {
+                                       struct rdma_dev_addr *dev_addr =
+                                               &id->route.addr.dev_addr;
+                                       struct net_device *ndev = NULL;
+
+                                       if (dev_addr->bound_dev_if)
+                                               ndev = dev_get_by_index(&init_net,
+                                                                       dev_addr->bound_dev_if);
+                                       if (ndev) {
+                                               cma_igmp_send(ndev,
+                                                             &mc->multicast.ib->rec.mgid,
+                                                             false);
+                                               dev_put(ndev);
+                                       }
+                                       mc->igmp_joined = false;
+                               }
                                kref_put(&mc->mcref, release_mc);
-
+                       }
                        return;
                }
        }
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
 {
        struct cma_device *cma_dev;
        struct rdma_id_private *id_priv;
+       unsigned int i;
+       unsigned long supported_gids = 0;
 
        cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
        if (!cma_dev)
                return;
 
        cma_dev->device = device;
+       cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+                                           sizeof(*cma_dev->default_gid_type),
+                                           GFP_KERNEL);
+       if (!cma_dev->default_gid_type) {
+               kfree(cma_dev);
+               return;
+       }
+       for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+               supported_gids = roce_gid_type_mask_support(device, i);
+               WARN_ON(!supported_gids);
+               cma_dev->default_gid_type[i - rdma_start_port(device)] =
+                       find_first_bit(&supported_gids, BITS_PER_LONG);
+       }
 
        init_completion(&cma_dev->comp);
        atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
        mutex_unlock(&lock);
 
        cma_process_remove(cma_dev);
+       kfree(cma_dev->default_gid_type);
        kfree(cma_dev);
 }
 
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
 
        if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
                printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+       cma_configfs_init();
 
        return 0;
 
@@ -4093,6 +4303,7 @@ err_wq:
 
 static void __exit cma_cleanup(void)
 {
+       cma_configfs_exit();
        ibnl_remove_client(RDMA_NL_RDMA_CM);
        ib_unregister_client(&cma_client);
        unregister_netdevice_notifier(&cma_nb);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
new file mode 100644 (file)
index 0000000..18b112a
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+struct cma_device;
+
+struct cma_dev_group;
+
+struct cma_dev_port_group {
+       unsigned int            port_num;
+       struct cma_dev_group    *cma_dev_group;
+       struct config_group     group;
+};
+
+struct cma_dev_group {
+       char                            name[IB_DEVICE_NAME_MAX];
+       struct config_group             device_group;
+       struct config_group             ports_group;
+       struct config_group             *default_dev_group[2];
+       struct config_group             **default_ports_group;
+       struct cma_dev_port_group       *ports;
+};
+
+static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
+{
+       struct config_group *group;
+
+       if (!item)
+               return NULL;
+
+       group = container_of(item, struct config_group, cg_item);
+       return container_of(group, struct cma_dev_port_group, group);
+}
+
+static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
+{
+       return !strcmp(ib_dev->name, cookie);
+}
+
+static int cma_configfs_params_get(struct config_item *item,
+                                  struct cma_device **pcma_dev,
+                                  struct cma_dev_port_group **pgroup)
+{
+       struct cma_dev_port_group *group = to_dev_port_group(item);
+       struct cma_device *cma_dev;
+
+       if (!group)
+               return -ENODEV;
+
+       cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+                                           group->cma_dev_group->name);
+       if (!cma_dev)
+               return -ENODEV;
+
+       *pcma_dev = cma_dev;
+       *pgroup = group;
+
+       return 0;
+}
+
+static void cma_configfs_params_put(struct cma_device *cma_dev)
+{
+       cma_deref_dev(cma_dev);
+}
+
+static ssize_t default_roce_mode_show(struct config_item *item,
+                                     char *buf)
+{
+       struct cma_device *cma_dev;
+       struct cma_dev_port_group *group;
+       int gid_type;
+       ssize_t ret;
+
+       ret = cma_configfs_params_get(item, &cma_dev, &group);
+       if (ret)
+               return ret;
+
+       gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
+       cma_configfs_params_put(cma_dev);
+
+       if (gid_type < 0)
+               return gid_type;
+
+       return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+}
+
+static ssize_t default_roce_mode_store(struct config_item *item,
+                                      const char *buf, size_t count)
+{
+       struct cma_device *cma_dev;
+       struct cma_dev_port_group *group;
+       int gid_type = ib_cache_gid_parse_type_str(buf);
+       ssize_t ret;
+
+       if (gid_type < 0)
+               return -EINVAL;
+
+       ret = cma_configfs_params_get(item, &cma_dev, &group);
+       if (ret)
+               return ret;
+
+       ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
+
+       cma_configfs_params_put(cma_dev);
+
+       return !ret ? strnlen(buf, count) : ret;
+}
+
+CONFIGFS_ATTR(, default_roce_mode);
+
+static struct configfs_attribute *cma_configfs_attributes[] = {
+       &attr_default_roce_mode,
+       NULL,
+};
+
+static struct config_item_type cma_port_group_type = {
+       .ct_attrs       = cma_configfs_attributes,
+       .ct_owner       = THIS_MODULE
+};
+
+static int make_cma_ports(struct cma_dev_group *cma_dev_group,
+                         struct cma_device *cma_dev)
+{
+       struct ib_device *ibdev;
+       unsigned int i;
+       unsigned int ports_num;
+       struct cma_dev_port_group *ports;
+       struct config_group **ports_group;
+       int err;
+
+       ibdev = cma_get_ib_dev(cma_dev);
+
+       if (!ibdev)
+               return -ENODEV;
+
+       ports_num = ibdev->phys_port_cnt;
+       ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
+                       GFP_KERNEL);
+       ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
+
+       if (!ports || !ports_group) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       for (i = 0; i < ports_num; i++) {
+               char port_str[10];
+
+               ports[i].port_num = i + 1;
+               snprintf(port_str, sizeof(port_str), "%u", i + 1);
+               ports[i].cma_dev_group = cma_dev_group;
+               config_group_init_type_name(&ports[i].group,
+                                           port_str,
+                                           &cma_port_group_type);
+               ports_group[i] = &ports[i].group;
+       }
+       ports_group[i] = NULL;
+       cma_dev_group->default_ports_group = ports_group;
+       cma_dev_group->ports = ports;
+
+       return 0;
+free:
+       kfree(ports);
+       kfree(ports_group);
+       cma_dev_group->ports = NULL;
+       cma_dev_group->default_ports_group = NULL;
+       return err;
+}
+
+static void release_cma_dev(struct config_item  *item)
+{
+       struct config_group *group = container_of(item, struct config_group,
+                                                 cg_item);
+       struct cma_dev_group *cma_dev_group = container_of(group,
+                                                          struct cma_dev_group,
+                                                          device_group);
+
+       kfree(cma_dev_group);
+};
+
+static void release_cma_ports_group(struct config_item  *item)
+{
+       struct config_group *group = container_of(item, struct config_group,
+                                                 cg_item);
+       struct cma_dev_group *cma_dev_group = container_of(group,
+                                                          struct cma_dev_group,
+                                                          ports_group);
+
+       kfree(cma_dev_group->ports);
+       kfree(cma_dev_group->default_ports_group);
+       cma_dev_group->ports = NULL;
+       cma_dev_group->default_ports_group = NULL;
+};
+
+static struct configfs_item_operations cma_ports_item_ops = {
+       .release = release_cma_ports_group
+};
+
+static struct config_item_type cma_ports_group_type = {
+       .ct_item_ops    = &cma_ports_item_ops,
+       .ct_owner       = THIS_MODULE
+};
+
+static struct configfs_item_operations cma_device_item_ops = {
+       .release = release_cma_dev
+};
+
+static struct config_item_type cma_device_group_type = {
+       .ct_item_ops    = &cma_device_item_ops,
+       .ct_owner       = THIS_MODULE
+};
+
+static struct config_group *make_cma_dev(struct config_group *group,
+                                        const char *name)
+{
+       int err = -ENODEV;
+       struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+                                                              (void *)name);
+       struct cma_dev_group *cma_dev_group = NULL;
+
+       if (!cma_dev)
+               goto fail;
+
+       cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
+
+       if (!cma_dev_group) {
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+
+       err = make_cma_ports(cma_dev_group, cma_dev);
+       if (err)
+               goto fail;
+
+       cma_dev_group->ports_group.default_groups =
+               cma_dev_group->default_ports_group;
+       config_group_init_type_name(&cma_dev_group->ports_group, "ports",
+                                   &cma_ports_group_type);
+
+       cma_dev_group->device_group.default_groups
+               = cma_dev_group->default_dev_group;
+       cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
+       cma_dev_group->default_dev_group[1] = NULL;
+
+       config_group_init_type_name(&cma_dev_group->device_group, name,
+                                   &cma_device_group_type);
+
+       cma_deref_dev(cma_dev);
+       return &cma_dev_group->device_group;
+
+fail:
+       if (cma_dev)
+               cma_deref_dev(cma_dev);
+       kfree(cma_dev_group);
+       return ERR_PTR(err);
+}
+
+static struct configfs_group_operations cma_subsys_group_ops = {
+       .make_group     = make_cma_dev,
+};
+
+static struct config_item_type cma_subsys_type = {
+       .ct_group_ops   = &cma_subsys_group_ops,
+       .ct_owner       = THIS_MODULE,
+};
+
+static struct configfs_subsystem cma_subsys = {
+       .su_group       = {
+               .cg_item        = {
+                       .ci_namebuf     = "rdma_cm",
+                       .ci_type        = &cma_subsys_type,
+               },
+       },
+};
+
+int __init cma_configfs_init(void)
+{
+       config_group_init(&cma_subsys.su_group);
+       mutex_init(&cma_subsys.su_mutex);
+       return configfs_register_subsystem(&cma_subsys);
+}
+
+void __exit cma_configfs_exit(void)
+{
+       configfs_unregister_subsystem(&cma_subsys);
+}
index 5cf6eb716f000a7aa07233419a0dcbbce41e0b6d..eab32215756b935159355e22c8d9bf76f108f873 100644 (file)
 
 #include <rdma/ib_verbs.h>
 
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+       return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+struct cma_device;
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+                                            void               *cookie);
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port);
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+                            unsigned int port,
+                            enum ib_gid_type default_gid_type);
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
+
 int  ib_device_register_sysfs(struct ib_device *device,
                              int (*port_callback)(struct ib_device *,
                                                   u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
        IB_CACHE_GID_DEFAULT_MODE_DELETE
 };
 
+int ib_cache_gid_parse_type_str(const char *buf);
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
+
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
                                  struct net_device *ndev,
+                                 unsigned long gid_type_mask,
                                  enum ib_cache_gid_default_mode mode);
 
 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
 void roce_gid_mgmt_cleanup(void);
 
 int roce_rescan_device(struct ib_device *ib_dev);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
 
 int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
 
+static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
+                                        struct net_device *upper)
+{
+       struct net_device *_upper = NULL;
+       struct list_head *iter;
+
+       netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
+               if (_upper == upper)
+                       break;
+
+       return _upper == upper;
+}
+
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644 (file)
index 0000000..a754fc7
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+/* # of WCs to poll for with a single call to ib_poll_cq */
+#define IB_POLL_BATCH                  16
+
+/* # of WCs to iterate over before yielding */
+#define IB_POLL_BUDGET_IRQ             256
+#define IB_POLL_BUDGET_WORKQUEUE       65536
+
+#define IB_POLL_FLAGS \
+       (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+
+static int __ib_process_cq(struct ib_cq *cq, int budget)
+{
+       int i, n, completed = 0;
+
+       while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+               for (i = 0; i < n; i++) {
+                       struct ib_wc *wc = &cq->wc[i];
+
+                       if (wc->wr_cqe)
+                               wc->wr_cqe->done(cq, wc);
+                       else
+                               WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
+               }
+
+               completed += n;
+
+               if (n != IB_POLL_BATCH ||
+                   (budget != -1 && completed >= budget))
+                       break;
+       }
+
+       return completed;
+}
+
+/**
+ * ib_process_direct_cq - process a CQ in caller context
+ * @cq:                CQ to process
+ * @budget:    number of CQEs to poll for
+ *
+ * This function is used to process all outstanding CQ entries on a
+ * %IB_POLL_DIRECT CQ.  It does not offload CQ processing to a different
+ * context and does not ask for completion interrupts from the HCA.
+ *
+ * Note: for compatibility reasons -1 can be passed in %budget for unlimited
+ * polling.  Do not use this feature in new code, it will be removed soon.
+ */
+int ib_process_cq_direct(struct ib_cq *cq, int budget)
+{
+       WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+       return __ib_process_cq(cq, budget);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
+
+static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
+{
+       WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
+}
+
+static int ib_poll_handler(struct irq_poll *iop, int budget)
+{
+       struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+       int completed;
+
+       completed = __ib_process_cq(cq, budget);
+       if (completed < budget) {
+               irq_poll_complete(&cq->iop);
+               if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+                       irq_poll_sched(&cq->iop);
+       }
+
+       return completed;
+}
+
+static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
+{
+       irq_poll_sched(&cq->iop);
+}
+
+static void ib_cq_poll_work(struct work_struct *work)
+{
+       struct ib_cq *cq = container_of(work, struct ib_cq, work);
+       int completed;
+
+       completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+       if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+           ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+               queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+       queue_work(ib_comp_wq, &cq->work);
+}
+
+/**
+ * ib_alloc_cq - allocate a completion queue
+ * @dev:               device to allocate the CQ for
+ * @private:           driver private data, accessible from cq->cq_context
+ * @nr_cqe:            number of CQEs to allocate
+ * @comp_vector:       HCA completion vectors for this CQ
+ * @poll_ctx:          context to poll the CQ from.
+ *
+ * This is the proper interface to allocate a CQ for in-kernel users. A
+ * CQ allocated with this interface will automatically be polled from the
+ * specified context.  The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * to use this CQ abstraction.
+ */
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+               int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+{
+       struct ib_cq_init_attr cq_attr = {
+               .cqe            = nr_cqe,
+               .comp_vector    = comp_vector,
+       };
+       struct ib_cq *cq;
+       int ret = -ENOMEM;
+
+       cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+       if (IS_ERR(cq))
+               return cq;
+
+       cq->device = dev;
+       cq->uobject = NULL;
+       cq->event_handler = NULL;
+       cq->cq_context = private;
+       cq->poll_ctx = poll_ctx;
+       atomic_set(&cq->usecnt, 0);
+
+       cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
+       if (!cq->wc)
+               goto out_destroy_cq;
+
+       switch (cq->poll_ctx) {
+       case IB_POLL_DIRECT:
+               cq->comp_handler = ib_cq_completion_direct;
+               break;
+       case IB_POLL_SOFTIRQ:
+               cq->comp_handler = ib_cq_completion_softirq;
+
+               irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+               ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               break;
+       case IB_POLL_WORKQUEUE:
+               cq->comp_handler = ib_cq_completion_workqueue;
+               INIT_WORK(&cq->work, ib_cq_poll_work);
+               ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               break;
+       default:
+               ret = -EINVAL;
+               goto out_free_wc;
+       }
+
+       return cq;
+
+out_free_wc:
+       kfree(cq->wc);
+out_destroy_cq:
+       cq->device->destroy_cq(cq);
+       return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_cq);
+
+/**
+ * ib_free_cq - free a completion queue
+ * @cq:                completion queue to free.
+ */
+void ib_free_cq(struct ib_cq *cq)
+{
+       int ret;
+
+       if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
+               return;
+
+       switch (cq->poll_ctx) {
+       case IB_POLL_DIRECT:
+               break;
+       case IB_POLL_SOFTIRQ:
+               irq_poll_disable(&cq->iop);
+               break;
+       case IB_POLL_WORKQUEUE:
+               flush_work(&cq->work);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+       }
+
+       kfree(cq->wc);
+       ret = cq->device->destroy_cq(cq);
+       WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL(ib_free_cq);
index 179e8134d57fc13b425254d5162006f9fc201879..00da80e02154205c428ca00702f4b7c10fa5a829 100644 (file)
@@ -58,6 +58,7 @@ struct ib_client_data {
        bool              going_down;
 };
 
+struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
 {
        int ret;
        struct ib_client *client;
+       struct ib_udata uhw = {.outlen = 0, .inlen = 0};
 
        mutex_lock(&device_mutex);
 
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
                goto out;
        }
 
+       memset(&device->attrs, 0, sizeof(device->attrs));
+       ret = device->query_device(device, &device->attrs, &uhw);
+       if (ret) {
+               printk(KERN_WARNING "Couldn't query the device attributes\n");
+               goto out;
+       }
+
        ret = ib_device_register_sysfs(device, port_callback);
        if (ret) {
                printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -627,25 +636,6 @@ void ib_dispatch_event(struct ib_event *event)
 }
 EXPORT_SYMBOL(ib_dispatch_event);
 
-/**
- * ib_query_device - Query IB device attributes
- * @device:Device to query
- * @device_attr:Device attributes
- *
- * ib_query_device() returns the attributes of a device through the
- * @device_attr pointer.
- */
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr)
-{
-       struct ib_udata uhw = {.outlen = 0, .inlen = 0};
-
-       memset(device_attr, 0, sizeof(*device_attr));
-
-       return device->query_device(device, device_attr, &uhw);
-}
-EXPORT_SYMBOL(ib_query_device);
-
 /**
  * ib_query_port - Query IB port attributes
  * @device:Device to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: Type of GID.
  * @ndev: The ndev related to the GID to search for.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the GID table where the GID was found.  This
  *   parameter may be NULL.
  */
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index)
+               enum ib_gid_type gid_type, struct net_device *ndev,
+               u8 *port_num, u16 *index)
 {
        union ib_gid tmp_gid;
        int ret, port, i;
 
        for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
                if (rdma_cap_roce_gid_table(device, port)) {
-                       if (!ib_find_cached_gid_by_port(device, gid, port,
+                       if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
                                                        ndev, index)) {
                                *port_num = port;
                                return 0;
                        }
                }
 
+               if (gid_type != IB_GID_TYPE_IB)
+                       continue;
+
                for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
                        ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
                        if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
        if (!ib_wq)
                return -ENOMEM;
 
+       ib_comp_wq = alloc_workqueue("ib-comp-wq",
+                       WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+                       WQ_UNBOUND_MAX_ACTIVE);
+       if (!ib_comp_wq) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
        ret = class_register(&ib_class);
        if (ret) {
                printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
-               goto err;
+               goto err_comp;
        }
 
        ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
 
 err_sysfs:
        class_unregister(&ib_class);
-
+err_comp:
+       destroy_workqueue(ib_comp_wq);
 err:
        destroy_workqueue(ib_wq);
        return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
        ib_cache_cleanup();
        ibnl_cleanup();
        class_unregister(&ib_class);
+       destroy_workqueue(ib_comp_wq);
        /* Make sure that any pending umem accounting work is done. */
        destroy_workqueue(ib_wq);
 }
index 9f5ad7cc33c89985fb1c490c27f07766e38e29fa..6ac3683c144ba859ff34709cc292e2fc3208f621 100644 (file)
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 {
        struct ib_device   *device;
        struct ib_fmr_pool *pool;
-       struct ib_device_attr *attr;
        int i;
        int ret;
        int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                return ERR_PTR(-ENOSYS);
        }
 
-       attr = kmalloc(sizeof *attr, GFP_KERNEL);
-       if (!attr) {
-               printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
-               return ERR_PTR(-ENOMEM);
-       }
-
-       ret = ib_query_device(device, attr);
-       if (ret) {
-               printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
-               kfree(attr);
-               return ERR_PTR(ret);
-       }
-
-       if (!attr->max_map_per_fmr)
+       if (!device->attrs.max_map_per_fmr)
                max_remaps = IB_FMR_MAX_REMAPS;
        else
-               max_remaps = attr->max_map_per_fmr;
-
-       kfree(attr);
+               max_remaps = device->attrs.max_map_per_fmr;
 
        pool = kmalloc(sizeof *pool, GFP_KERNEL);
        if (!pool) {
index 2281de122038e45a5c375f7d7669111ab2aee2de..9fa5bf33f5a34261b16a72c9800b55daab921c84 100644 (file)
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                              u8 mgmt_class);
 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                           struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+                             struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /*
  * Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
 
                atomic_inc(&mad_snoop_priv->refcount);
                spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-               mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+               mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
                                                   mad_recv_wc);
                deref_snoop_agent(mad_snoop_priv);
                spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
        spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 }
 
-static void build_smp_wc(struct ib_qp *qp,
-                        u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
-                        struct ib_wc *wc)
+static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
+               u16 pkey_index, u8 port_num, struct ib_wc *wc)
 {
        memset(wc, 0, sizeof *wc);
-       wc->wr_id = wr_id;
+       wc->wr_cqe = cqe;
        wc->status = IB_WC_SUCCESS;
        wc->opcode = IB_WC_RECV;
        wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        }
 
        build_smp_wc(mad_agent_priv->agent.qp,
-                    send_wr->wr.wr_id, drslid,
+                    send_wr->wr.wr_cqe, drslid,
                     send_wr->pkey_index,
                     send_wr->port_num, &mad_wc);
 
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 
        mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
 
-       mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+       mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+       mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
        mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
        mad_send_wr->send_wr.wr.num_sge = 2;
        mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
        /* Set WR ID to find mad_send_wr upon completion */
        qp_info = mad_send_wr->mad_agent_priv->qp_info;
-       mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
        mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+       mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+       mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
 
        mad_agent = mad_send_wr->send_buf.mad_agent;
        sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                                /* user rmpp is in effect
                                 * and this is an active RMPP MAD
                                 */
-                               mad_recv_wc->wc->wr_id = 0;
-                               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-                                                                  mad_recv_wc);
+                               mad_agent_priv->agent.recv_handler(
+                                               &mad_agent_priv->agent, NULL,
+                                               mad_recv_wc);
                                atomic_dec(&mad_agent_priv->refcount);
                        } else {
                                /* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
                        /* Defined behavior is to complete response before request */
-                       mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
-                       mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-                                                          mad_recv_wc);
+                       mad_agent_priv->agent.recv_handler(
+                                       &mad_agent_priv->agent,
+                                       &mad_send_wr->send_buf,
+                                       mad_recv_wc);
                        atomic_dec(&mad_agent_priv->refcount);
 
                        mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
                        ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
                }
        } else {
-               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+               mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
                                                   mad_recv_wc);
                deref_mad_agent(mad_agent_priv);
        }
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
        return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
 }
 
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
-                                    struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct ib_mad_port_private *port_priv = cq->cq_context;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
        struct ib_mad_qp_info *qp_info;
        struct ib_mad_private_header *mad_priv_hdr;
        struct ib_mad_private *recv, *response = NULL;
-       struct ib_mad_list_head *mad_list;
        struct ib_mad_agent_private *mad_agent;
        int port_num;
        int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        u16 resp_mad_pkey_index = 0;
        bool opa;
 
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+       if (list_empty_careful(&port_priv->port_list))
+               return;
+
+       if (wc->status != IB_WC_SUCCESS) {
+               /*
+                * Receive errors indicate that the QP has entered the error
+                * state - error handling/shutdown code will cleanup
+                */
+               return;
+       }
+
        qp_info = mad_list->mad_queue->qp_info;
        dequeue_mad(mad_list);
 
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        response = alloc_mad_private(mad_size, GFP_KERNEL);
        if (!response) {
                dev_err(&port_priv->device->dev,
-                       "ib_mad_recv_done_handler no memory for response buffer\n");
+                       "%s: no memory for response buffer\n", __func__);
                goto out;
        }
 
@@ -2413,11 +2430,12 @@ done:
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
-                                    struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct ib_mad_port_private *port_priv = cq->cq_context;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
        struct ib_mad_send_wr_private   *mad_send_wr, *queued_send_wr;
-       struct ib_mad_list_head         *mad_list;
        struct ib_mad_qp_info           *qp_info;
        struct ib_mad_queue             *send_queue;
        struct ib_send_wr               *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
        unsigned long flags;
        int ret;
 
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+       if (list_empty_careful(&port_priv->port_list))
+               return;
+
+       if (wc->status != IB_WC_SUCCESS) {
+               if (!ib_mad_send_error(port_priv, wc))
+                       return;
+       }
+
        mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
                                   mad_list);
        send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
        spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
 }
 
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
-                             struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+               struct ib_wc *wc)
 {
-       struct ib_mad_list_head *mad_list;
-       struct ib_mad_qp_info *qp_info;
+       struct ib_mad_list_head *mad_list =
+               container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+       struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
        struct ib_mad_send_wr_private *mad_send_wr;
        int ret;
 
-       /* Determine if failure was a send or receive */
-       mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
-       qp_info = mad_list->mad_queue->qp_info;
-       if (mad_list->mad_queue == &qp_info->recv_queue)
-               /*
-                * Receive errors indicate that the QP has entered the error
-                * state - error handling/shutdown code will cleanup
-                */
-               return;
-
        /*
         * Send errors will transition the QP to SQE - move
         * QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        mad_send_wr->retry = 0;
                        ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
                                        &bad_send_wr);
-                       if (ret)
-                               ib_mad_send_done_handler(port_priv, wc);
-               } else
-                       ib_mad_send_done_handler(port_priv, wc);
+                       if (!ret)
+                               return false;
+               }
        } else {
                struct ib_qp_attr *attr;
 
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        kfree(attr);
                        if (ret)
                                dev_err(&port_priv->device->dev,
-                                       "mad_error_handler - ib_modify_qp to RTS : %d\n",
-                                       ret);
+                                       "%s - ib_modify_qp to RTS: %d\n",
+                                       __func__, ret);
                        else
                                mark_sends_for_retry(qp_info);
                }
-               ib_mad_send_done_handler(port_priv, wc);
        }
-}
 
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
-       struct ib_mad_port_private *port_priv;
-       struct ib_wc wc;
-
-       port_priv = container_of(work, struct ib_mad_port_private, work);
-       ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
-       while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
-               if (wc.status == IB_WC_SUCCESS) {
-                       switch (wc.opcode) {
-                       case IB_WC_SEND:
-                               ib_mad_send_done_handler(port_priv, &wc);
-                               break;
-                       case IB_WC_RECV:
-                               ib_mad_recv_done_handler(port_priv, &wc);
-                               break;
-                       default:
-                               BUG_ON(1);
-                               break;
-                       }
-               } else
-                       mad_error_handler(port_priv, &wc);
-       }
+       return true;
 }
 
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
                         * before request
                         */
                        build_smp_wc(recv_mad_agent->agent.qp,
-                                    (unsigned long) local->mad_send_wr,
+                                    local->mad_send_wr->send_wr.wr.wr_cqe,
                                     be16_to_cpu(IB_LID_PERMISSIVE),
                                     local->mad_send_wr->send_wr.pkey_index,
                                     recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
                                           IB_MAD_SNOOP_RECVS);
                        recv_mad_agent->agent.recv_handler(
                                                &recv_mad_agent->agent,
+                                               &local->mad_send_wr->send_buf,
                                                &local->mad_priv->header.recv_wc);
                        spin_lock_irqsave(&recv_mad_agent->lock, flags);
                        atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
-       struct ib_mad_port_private *port_priv = cq->cq_context;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ib_mad_port_list_lock, flags);
-       if (!list_empty(&port_priv->port_list))
-               queue_work(port_priv->wq, &port_priv->work);
-       spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
 /*
  * Allocate receive MADs and post receive WRs for them
  */
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                        break;
                }
                mad_priv->header.mapping = sg_list.addr;
-               recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
                mad_priv->header.mad_list.mad_queue = recv_queue;
+               mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+               recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
 
                /* Post receive WR */
                spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
        unsigned long flags;
        char name[sizeof "ib_mad123"];
        int has_smi;
-       struct ib_cq_init_attr cq_attr = {};
 
        if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
                return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
        if (has_smi)
                cq_size *= 2;
 
-       cq_attr.cqe = cq_size;
-       port_priv->cq = ib_create_cq(port_priv->device,
-                                    ib_mad_thread_completion_handler,
-                                    NULL, port_priv, &cq_attr);
+       port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+                       IB_POLL_WORKQUEUE);
        if (IS_ERR(port_priv->cq)) {
                dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
                ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
                ret = -ENOMEM;
                goto error8;
        }
-       INIT_WORK(&port_priv->work, ib_mad_completion_handler);
 
        spin_lock_irqsave(&ib_mad_port_list_lock, flags);
        list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
 error6:
        ib_dealloc_pd(port_priv->pd);
 error4:
-       ib_destroy_cq(port_priv->cq);
+       ib_free_cq(port_priv->cq);
        cleanup_recv_queue(&port_priv->qp_info[1]);
        cleanup_recv_queue(&port_priv->qp_info[0]);
 error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
        destroy_mad_qp(&port_priv->qp_info[1]);
        destroy_mad_qp(&port_priv->qp_info[0]);
        ib_dealloc_pd(port_priv->pd);
-       ib_destroy_cq(port_priv->cq);
+       ib_free_cq(port_priv->cq);
        cleanup_recv_queue(&port_priv->qp_info[1]);
        cleanup_recv_queue(&port_priv->qp_info[0]);
        /* XXX: Handle deallocation of MAD registration tables */
index 990698a6ab4b7024116ae50c9417d00ce179b41c..28669f6419e1fba02b65a0d776c55b08d3cc4fbc 100644 (file)
@@ -64,6 +64,7 @@
 
 struct ib_mad_list_head {
        struct list_head list;
+       struct ib_cqe cqe;
        struct ib_mad_queue *mad_queue;
 };
 
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
        struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
        struct list_head agent_list;
        struct workqueue_struct *wq;
-       struct work_struct work;
        struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
index bb6685fb08c61483546505f99037b88af6202ad0..250937cb9a1a5071f054a24e74414a8c90bd88a0 100644 (file)
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
 
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr)
 {
        int ret;
        u16 gid_index;
        u8 p;
 
-       ret = ib_find_cached_gid(device, &rec->port_gid,
-                                NULL, &p, &gid_index);
+       if (rdma_protocol_roce(device, port_num)) {
+               ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+                                                gid_type, port_num,
+                                                ndev,
+                                                &gid_index);
+       } else if (rdma_protocol_ib(device, port_num)) {
+               ret = ib_find_cached_gid(device, &rec->port_gid,
+                                        IB_GID_TYPE_IB, NULL, &p,
+                                        &gid_index);
+       } else {
+               ret = -EINVAL;
+       }
+
        if (ret)
                return ret;
 
index 178f98482e13e217c16d447978a855a900808bc0..06556c34606deb38e8906d08327a7d024f38079e 100644 (file)
@@ -67,17 +67,53 @@ struct netdev_event_work {
        struct netdev_event_work_cmd    cmds[ROCE_NETDEV_CALLBACK_SZ];
 };
 
+static const struct {
+       bool (*is_supported)(const struct ib_device *device, u8 port_num);
+       enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+       {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
+       {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+};
+
+#define CAP_TO_GID_TABLE_SIZE  ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+       int i;
+       unsigned int ret_flags = 0;
+
+       if (!rdma_protocol_roce(ib_dev, port))
+               return 1UL << IB_GID_TYPE_IB;
+
+       for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+               if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
+                       ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+       return ret_flags;
+}
+EXPORT_SYMBOL(roce_gid_type_mask_support);
+
 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
                       u8 port, union ib_gid *gid,
                       struct ib_gid_attr *gid_attr)
 {
-       switch (gid_op) {
-       case GID_ADD:
-               ib_cache_gid_add(ib_dev, port, gid, gid_attr);
-               break;
-       case GID_DEL:
-               ib_cache_gid_del(ib_dev, port, gid, gid_attr);
-               break;
+       int i;
+       unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+               if ((1UL << i) & gid_type_mask) {
+                       gid_attr->gid_type = i;
+                       switch (gid_op) {
+                       case GID_ADD:
+                               ib_cache_gid_add(ib_dev, port,
+                                                gid, gid_attr);
+                               break;
+                       case GID_DEL:
+                               ib_cache_gid_del(ib_dev, port,
+                                                gid, gid_attr);
+                               break;
+                       }
+               }
        }
 }
 
@@ -103,18 +139,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
        return BONDING_SLAVE_STATE_NA;
 }
 
-static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
-{
-       struct net_device *_upper = NULL;
-       struct list_head *iter;
-
-       netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
-               if (_upper == upper)
-                       break;
-
-       return _upper == upper;
-}
-
 #define REQUIRED_BOND_STATES           (BONDING_SLAVE_STATE_ACTIVE |   \
                                         BONDING_SLAVE_STATE_NA)
 static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
@@ -132,7 +156,7 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
        if (!real_dev)
                real_dev = event_ndev;
 
-       res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+       res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
               (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
                REQUIRED_BOND_STATES)) ||
               real_dev == rdma_ndev);
@@ -178,7 +202,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
                return 1;
 
        rcu_read_lock();
-       res = is_upper_dev_rcu(rdma_ndev, event_ndev);
+       res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
        rcu_read_unlock();
 
        return res;
@@ -203,10 +227,12 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
                                     u8 port, struct net_device *event_ndev,
                                     struct net_device *rdma_ndev)
 {
+       unsigned long gid_type_mask;
+
        rcu_read_lock();
        if (!rdma_ndev ||
            ((rdma_ndev != event_ndev &&
-             !is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+             !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
             is_eth_active_slave_of_bonding_rcu(rdma_ndev,
                                                netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
             BONDING_SLAVE_STATE_INACTIVE)) {
@@ -215,7 +241,9 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
        }
        rcu_read_unlock();
 
-       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+       gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+       ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
                                     IB_CACHE_GID_DEFAULT_MODE_SET);
 }
 
@@ -234,12 +262,17 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 
        rcu_read_lock();
 
-       if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+       if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
            is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
            BONDING_SLAVE_STATE_INACTIVE) {
+               unsigned long gid_type_mask;
+
                rcu_read_unlock();
 
+               gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
                ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+                                            gid_type_mask,
                                             IB_CACHE_GID_DEFAULT_MODE_DELETE);
        } else {
                rcu_read_unlock();
index a95a32ba596edc03728cf7790a6752c951cde951..f334090bb6129bf7f3cfe788e5cff7bc762752c3 100644 (file)
@@ -49,7 +49,9 @@
 #include <net/netlink.h>
 #include <uapi/rdma/ib_user_sa.h>
 #include <rdma/ib_marshall.h>
+#include <rdma/ib_addr.h>
 #include "sa.h"
+#include "core_priv.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -715,7 +717,9 @@ static int ib_nl_handle_set_timeout(struct sk_buff *skb,
        struct nlattr *tb[LS_NLA_TYPE_MAX];
        int ret;
 
-       if (!netlink_capable(skb, CAP_NET_ADMIN))
+       if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;
 
        ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -789,7 +793,9 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
        int found = 0;
        int ret;
 
-       if (!netlink_capable(skb, CAP_NET_ADMIN))
+       if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;
 
        spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -996,7 +1002,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 {
        int ret;
        u16 gid_index;
-       int force_grh;
+       int use_roce;
+       struct net_device *ndev = NULL;
 
        memset(ah_attr, 0, sizeof *ah_attr);
        ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -1006,16 +1013,71 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
        ah_attr->port_num = port_num;
        ah_attr->static_rate = rec->rate;
 
-       force_grh = rdma_cap_eth_ah(device, port_num);
+       use_roce = rdma_cap_eth_ah(device, port_num);
+
+       if (use_roce) {
+               struct net_device *idev;
+               struct net_device *resolved_dev;
+               struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
+                                                .net = rec->net ? rec->net :
+                                                        &init_net};
+               union {
+                       struct sockaddr     _sockaddr;
+                       struct sockaddr_in  _sockaddr_in;
+                       struct sockaddr_in6 _sockaddr_in6;
+               } sgid_addr, dgid_addr;
+
+               if (!device->get_netdev)
+                       return -EOPNOTSUPP;
+
+               rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+               rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
+
+               /* validate the route */
+               ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+                                           &dgid_addr._sockaddr, &dev_addr);
+               if (ret)
+                       return ret;
 
-       if (rec->hop_limit > 1 || force_grh) {
-               struct net_device *ndev = ib_get_ndev_from_path(rec);
+               if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+                    dev_addr.network == RDMA_NETWORK_IPV6) &&
+                   rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       return -EINVAL;
+
+               idev = device->get_netdev(device, port_num);
+               if (!idev)
+                       return -ENODEV;
+
+               resolved_dev = dev_get_by_index(dev_addr.net,
+                                               dev_addr.bound_dev_if);
+               if (resolved_dev->flags & IFF_LOOPBACK) {
+                       dev_put(resolved_dev);
+                       resolved_dev = idev;
+                       dev_hold(resolved_dev);
+               }
+               ndev = ib_get_ndev_from_path(rec);
+               rcu_read_lock();
+               if ((ndev && ndev != resolved_dev) ||
+                   (resolved_dev != idev &&
+                    !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+                       ret = -EHOSTUNREACH;
+               rcu_read_unlock();
+               dev_put(idev);
+               dev_put(resolved_dev);
+               if (ret) {
+                       if (ndev)
+                               dev_put(ndev);
+                       return ret;
+               }
+       }
 
+       if (rec->hop_limit > 1 || use_roce) {
                ah_attr->ah_flags = IB_AH_GRH;
                ah_attr->grh.dgid = rec->dgid;
 
-               ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
-                                        &gid_index);
+               ret = ib_find_cached_gid_by_port(device, &rec->sgid,
+                                                rec->gid_type, port_num, ndev,
+                                                &gid_index);
                if (ret) {
                        if (ndev)
                                dev_put(ndev);
@@ -1029,9 +1091,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
                if (ndev)
                        dev_put(ndev);
        }
-       if (force_grh) {
+
+       if (use_roce)
                memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
-       }
+
        return 0;
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1157,6 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
                          mad->data, &rec);
                rec.net = NULL;
                rec.ifindex = 0;
+               rec.gid_type = IB_GID_TYPE_IB;
                memset(rec.dmac, 0, ETH_ALEN);
                query->callback(status, &rec, query->context);
        } else
@@ -1609,14 +1673,15 @@ static void send_handler(struct ib_mad_agent *agent,
 }
 
 static void recv_handler(struct ib_mad_agent *mad_agent,
+                        struct ib_mad_send_buf *send_buf,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_sa_query *query;
-       struct ib_mad_send_buf *mad_buf;
 
-       mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
-       query = mad_buf->context[0];
+       if (!send_buf)
+               return;
 
+       query = send_buf->context[0];
        if (query->callback) {
                if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
                        query->callback(query,
index b1f37d4095fa1e15f7402c35a367b00e1f24b6b5..3de93517efe43b097d7f58e37cfbe36bd8ab0f65 100644 (file)
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/string.h>
+#include <linux/netdevice.h>
 
 #include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
 
+struct ib_port;
+
+struct gid_attr_group {
+       struct ib_port          *port;
+       struct kobject          kobj;
+       struct attribute_group  ndev;
+       struct attribute_group  type;
+};
 struct ib_port {
        struct kobject         kobj;
        struct ib_device      *ibdev;
+       struct gid_attr_group *gid_attr_group;
        struct attribute_group gid_group;
        struct attribute_group pkey_group;
        u8                     port_num;
+       struct attribute_group *pma_table;
 };
 
 struct port_attribute {
@@ -65,6 +77,7 @@ struct port_table_attribute {
        struct port_attribute   attr;
        char                    name[8];
        int                     index;
+       __be16                  attr_id;
 };
 
 static ssize_t port_attr_show(struct kobject *kobj,
@@ -84,6 +97,24 @@ static const struct sysfs_ops port_sysfs_ops = {
        .show = port_attr_show
 };
 
+static ssize_t gid_attr_show(struct kobject *kobj,
+                            struct attribute *attr, char *buf)
+{
+       struct port_attribute *port_attr =
+               container_of(attr, struct port_attribute, attr);
+       struct ib_port *p = container_of(kobj, struct gid_attr_group,
+                                        kobj)->port;
+
+       if (!port_attr->show)
+               return -EIO;
+
+       return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+       .show = gid_attr_show
+};
+
 static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
                          char *buf)
 {
@@ -281,6 +312,46 @@ static struct attribute *port_default_attrs[] = {
        NULL
 };
 
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+       if (!gid_attr->ndev)
+               return -EINVAL;
+
+       return sprintf(buf, "%s\n", gid_attr->ndev->name);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+       return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+                                  struct port_attribute *attr,
+                                  char *buf,
+                                  size_t (*print)(struct ib_gid_attr *gid_attr,
+                                                  char *buf))
+{
+       struct port_table_attribute *tab_attr =
+               container_of(attr, struct port_table_attribute, attr);
+       union ib_gid gid;
+       struct ib_gid_attr gid_attr = {};
+       ssize_t ret;
+       va_list args;
+
+       ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+                          &gid_attr);
+       if (ret)
+               goto err;
+
+       ret = print(&gid_attr, buf);
+
+err:
+       if (gid_attr.ndev)
+               dev_put(gid_attr.ndev);
+       va_end(args);
+       return ret;
+}
+
 static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
                             char *buf)
 {
@@ -296,6 +367,19 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
        return sprintf(buf, "%pI6\n", gid.raw);
 }
 
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+                                      struct port_attribute *attr, char *buf)
+{
+       return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+                                          struct port_attribute *attr,
+                                          char *buf)
+{
+       return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
 static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
                              char *buf)
 {
@@ -314,24 +398,32 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
 #define PORT_PMA_ATTR(_name, _counter, _width, _offset)                        \
 struct port_table_attribute port_pma_attr_##_name = {                  \
        .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),        \
-       .index = (_offset) | ((_width) << 16) | ((_counter) << 24)      \
+       .index = (_offset) | ((_width) << 16) | ((_counter) << 24),     \
+       .attr_id = IB_PMA_PORT_COUNTERS ,                               \
 }
 
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
-                               char *buf)
+#define PORT_PMA_ATTR_EXT(_name, _width, _offset)                      \
+struct port_table_attribute port_pma_attr_ext_##_name = {              \
+       .attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),        \
+       .index = (_offset) | ((_width) << 16),                          \
+       .attr_id = IB_PMA_PORT_COUNTERS_EXT ,                           \
+}
+
+/*
+ * Get a Perfmgmt MAD block of data.
+ * Returns error code or the number of bytes retrieved.
+ */
+static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
+               void *data, int offset, size_t size)
 {
-       struct port_table_attribute *tab_attr =
-               container_of(attr, struct port_table_attribute, attr);
-       int offset = tab_attr->index & 0xffff;
-       int width  = (tab_attr->index >> 16) & 0xff;
-       struct ib_mad *in_mad  = NULL;
-       struct ib_mad *out_mad = NULL;
+       struct ib_mad *in_mad;
+       struct ib_mad *out_mad;
        size_t mad_size = sizeof(*out_mad);
        u16 out_mad_pkey_index = 0;
        ssize_t ret;
 
-       if (!p->ibdev->process_mad)
-               return sprintf(buf, "N/A (no PMA)\n");
+       if (!dev->process_mad)
+               return -ENOSYS;
 
        in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
        out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -344,12 +436,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
        in_mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_PERF_MGMT;
        in_mad->mad_hdr.class_version = 1;
        in_mad->mad_hdr.method        = IB_MGMT_METHOD_GET;
-       in_mad->mad_hdr.attr_id       = cpu_to_be16(0x12); /* PortCounters */
+       in_mad->mad_hdr.attr_id       = attr;
 
-       in_mad->data[41] = p->port_num; /* PortSelect field */
+       if (attr != IB_PMA_CLASS_PORT_INFO)
+               in_mad->data[41] = port_num;    /* PortSelect field */
 
-       if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
-                p->port_num, NULL, NULL,
+       if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
+                port_num, NULL, NULL,
                 (const struct ib_mad_hdr *)in_mad, mad_size,
                 (struct ib_mad_hdr *)out_mad, &mad_size,
                 &out_mad_pkey_index) &
@@ -358,31 +451,54 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
                ret = -EINVAL;
                goto out;
        }
+       memcpy(data, out_mad->data + offset, size);
+       ret = size;
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return ret;
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+                               char *buf)
+{
+       struct port_table_attribute *tab_attr =
+               container_of(attr, struct port_table_attribute, attr);
+       int offset = tab_attr->index & 0xffff;
+       int width  = (tab_attr->index >> 16) & 0xff;
+       ssize_t ret;
+       u8 data[8];
+
+       ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+                       40 + offset / 8, sizeof(data));
+       if (ret < 0)
+               return sprintf(buf, "N/A (no PMA)\n");
 
        switch (width) {
        case 4:
-               ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+               ret = sprintf(buf, "%u\n", (*data >>
                                            (4 - (offset % 8))) & 0xf);
                break;
        case 8:
-               ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+               ret = sprintf(buf, "%u\n", *data);
                break;
        case 16:
                ret = sprintf(buf, "%u\n",
-                             be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+                             be16_to_cpup((__be16 *)data));
                break;
        case 32:
                ret = sprintf(buf, "%u\n",
-                             be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+                             be32_to_cpup((__be32 *)data));
+               break;
+       case 64:
+               ret = sprintf(buf, "%llu\n",
+                               be64_to_cpup((__be64 *)data));
                break;
+
        default:
                ret = 0;
        }
 
-out:
-       kfree(in_mad);
-       kfree(out_mad);
-
        return ret;
 }
 
@@ -403,6 +519,18 @@ static PORT_PMA_ATTR(port_rcv_data             , 13, 32, 224);
 static PORT_PMA_ATTR(port_xmit_packets             , 14, 32, 256);
 static PORT_PMA_ATTR(port_rcv_packets              , 15, 32, 288);
 
+/*
+ * Counters added by extended set
+ */
+static PORT_PMA_ATTR_EXT(port_xmit_data                    , 64,  64);
+static PORT_PMA_ATTR_EXT(port_rcv_data             , 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets         , 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets          , 64, 256);
+static PORT_PMA_ATTR_EXT(unicast_xmit_packets      , 64, 320);
+static PORT_PMA_ATTR_EXT(unicast_rcv_packets       , 64, 384);
+static PORT_PMA_ATTR_EXT(multicast_xmit_packets            , 64, 448);
+static PORT_PMA_ATTR_EXT(multicast_rcv_packets     , 64, 512);
+
 static struct attribute *pma_attrs[] = {
        &port_pma_attr_symbol_error.attr.attr,
        &port_pma_attr_link_error_recovery.attr.attr,
@@ -423,11 +551,65 @@ static struct attribute *pma_attrs[] = {
        NULL
 };
 
+static struct attribute *pma_attrs_ext[] = {
+       &port_pma_attr_symbol_error.attr.attr,
+       &port_pma_attr_link_error_recovery.attr.attr,
+       &port_pma_attr_link_downed.attr.attr,
+       &port_pma_attr_port_rcv_errors.attr.attr,
+       &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+       &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+       &port_pma_attr_port_xmit_discards.attr.attr,
+       &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+       &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+       &port_pma_attr_local_link_integrity_errors.attr.attr,
+       &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+       &port_pma_attr_VL15_dropped.attr.attr,
+       &port_pma_attr_ext_port_xmit_data.attr.attr,
+       &port_pma_attr_ext_port_rcv_data.attr.attr,
+       &port_pma_attr_ext_port_xmit_packets.attr.attr,
+       &port_pma_attr_ext_port_rcv_packets.attr.attr,
+       &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
+       &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
+       &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
+       &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
+       NULL
+};
+
+static struct attribute *pma_attrs_noietf[] = {
+       &port_pma_attr_symbol_error.attr.attr,
+       &port_pma_attr_link_error_recovery.attr.attr,
+       &port_pma_attr_link_downed.attr.attr,
+       &port_pma_attr_port_rcv_errors.attr.attr,
+       &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+       &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+       &port_pma_attr_port_xmit_discards.attr.attr,
+       &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+       &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+       &port_pma_attr_local_link_integrity_errors.attr.attr,
+       &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+       &port_pma_attr_VL15_dropped.attr.attr,
+       &port_pma_attr_ext_port_xmit_data.attr.attr,
+       &port_pma_attr_ext_port_rcv_data.attr.attr,
+       &port_pma_attr_ext_port_xmit_packets.attr.attr,
+       &port_pma_attr_ext_port_rcv_packets.attr.attr,
+       NULL
+};
+
 static struct attribute_group pma_group = {
        .name  = "counters",
        .attrs  = pma_attrs
 };
 
+static struct attribute_group pma_group_ext = {
+       .name  = "counters",
+       .attrs  = pma_attrs_ext
+};
+
+static struct attribute_group pma_group_noietf = {
+       .name  = "counters",
+       .attrs  = pma_attrs_noietf
+};
+
 static void ib_port_release(struct kobject *kobj)
 {
        struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -451,12 +633,41 @@ static void ib_port_release(struct kobject *kobj)
        kfree(p);
 }
 
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+       struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+                                               kobj);
+       struct attribute *a;
+       int i;
+
+       if (g->ndev.attrs) {
+               for (i = 0; (a = g->ndev.attrs[i]); ++i)
+                       kfree(a);
+
+               kfree(g->ndev.attrs);
+       }
+
+       if (g->type.attrs) {
+               for (i = 0; (a = g->type.attrs[i]); ++i)
+                       kfree(a);
+
+               kfree(g->type.attrs);
+       }
+
+       kfree(g);
+}
+
 static struct kobj_type port_type = {
        .release       = ib_port_release,
        .sysfs_ops     = &port_sysfs_ops,
        .default_attrs = port_default_attrs
 };
 
+static struct kobj_type gid_attr_type = {
+       .sysfs_ops      = &gid_attr_sysfs_ops,
+       .release        = ib_port_gid_attr_release
+};
+
 static struct attribute **
 alloc_group_attrs(ssize_t (*show)(struct ib_port *,
                                  struct port_attribute *, char *buf),
@@ -500,6 +711,31 @@ err:
        return NULL;
 }
 
+/*
+ * Figure out which counter table to use depending on
+ * the device capabilities.
+ */
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+                                                int port_num)
+{
+       struct ib_class_port_info cpi;
+
+       if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
+                               &cpi, 40, sizeof(cpi)) >= 0) {
+
+               if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
+                       /* We have extended counters */
+                       return &pma_group_ext;
+
+               if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+                       /* But not the IETF ones */
+                       return &pma_group_noietf;
+       }
+
+       /* Fall back to normal counters */
+       return &pma_group;
+}
+
 static int add_port(struct ib_device *device, int port_num,
                    int (*port_callback)(struct ib_device *,
                                         u8, struct kobject *))
@@ -528,9 +764,24 @@ static int add_port(struct ib_device *device, int port_num,
                return ret;
        }
 
-       ret = sysfs_create_group(&p->kobj, &pma_group);
-       if (ret)
+       p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+       if (!p->gid_attr_group) {
+               ret = -ENOMEM;
                goto err_put;
+       }
+
+       p->gid_attr_group->port = p;
+       ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+                                  &p->kobj, "gid_attrs");
+       if (ret) {
+               kfree(p->gid_attr_group);
+               goto err_put;
+       }
+
+       p->pma_table = get_counter_table(device, port_num);
+       ret = sysfs_create_group(&p->kobj, p->pma_table);
+       if (ret)
+               goto err_put_gid_attrs;
 
        p->gid_group.name  = "gids";
        p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -543,12 +794,38 @@ static int add_port(struct ib_device *device, int port_num,
        if (ret)
                goto err_free_gid;
 
+       p->gid_attr_group->ndev.name = "ndevs";
+       p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+                                                         attr.gid_tbl_len);
+       if (!p->gid_attr_group->ndev.attrs) {
+               ret = -ENOMEM;
+               goto err_remove_gid;
+       }
+
+       ret = sysfs_create_group(&p->gid_attr_group->kobj,
+                                &p->gid_attr_group->ndev);
+       if (ret)
+               goto err_free_gid_ndev;
+
+       p->gid_attr_group->type.name = "types";
+       p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+                                                         attr.gid_tbl_len);
+       if (!p->gid_attr_group->type.attrs) {
+               ret = -ENOMEM;
+               goto err_remove_gid_ndev;
+       }
+
+       ret = sysfs_create_group(&p->gid_attr_group->kobj,
+                                &p->gid_attr_group->type);
+       if (ret)
+               goto err_free_gid_type;
+
        p->pkey_group.name  = "pkeys";
        p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
                                                attr.pkey_tbl_len);
        if (!p->pkey_group.attrs) {
                ret = -ENOMEM;
-               goto err_remove_gid;
+               goto err_remove_gid_type;
        }
 
        ret = sysfs_create_group(&p->kobj, &p->pkey_group);
@@ -576,6 +853,28 @@ err_free_pkey:
        kfree(p->pkey_group.attrs);
        p->pkey_group.attrs = NULL;
 
+err_remove_gid_type:
+       sysfs_remove_group(&p->gid_attr_group->kobj,
+                          &p->gid_attr_group->type);
+
+err_free_gid_type:
+       for (i = 0; i < attr.gid_tbl_len; ++i)
+               kfree(p->gid_attr_group->type.attrs[i]);
+
+       kfree(p->gid_attr_group->type.attrs);
+       p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+       sysfs_remove_group(&p->gid_attr_group->kobj,
+                          &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+       for (i = 0; i < attr.gid_tbl_len; ++i)
+               kfree(p->gid_attr_group->ndev.attrs[i]);
+
+       kfree(p->gid_attr_group->ndev.attrs);
+       p->gid_attr_group->ndev.attrs = NULL;
+
 err_remove_gid:
        sysfs_remove_group(&p->kobj, &p->gid_group);
 
@@ -587,7 +886,10 @@ err_free_gid:
        p->gid_group.attrs = NULL;
 
 err_remove_pma:
-       sysfs_remove_group(&p->kobj, &pma_group);
+       sysfs_remove_group(&p->kobj, p->pma_table);
+
+err_put_gid_attrs:
+       kobject_put(&p->gid_attr_group->kobj);
 
 err_put:
        kobject_put(&p->kobj);
@@ -614,18 +916,12 @@ static ssize_t show_sys_image_guid(struct device *device,
                                   struct device_attribute *dev_attr, char *buf)
 {
        struct ib_device *dev = container_of(device, struct ib_device, dev);
-       struct ib_device_attr attr;
-       ssize_t ret;
-
-       ret = ib_query_device(dev, &attr);
-       if (ret)
-               return ret;
 
        return sprintf(buf, "%04x:%04x:%04x:%04x\n",
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
-                      be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
+                      be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
 }
 
 static ssize_t show_node_guid(struct device *device,
@@ -800,9 +1096,14 @@ static void free_port_list_attributes(struct ib_device *device)
        list_for_each_entry_safe(p, t, &device->port_list, entry) {
                struct ib_port *port = container_of(p, struct ib_port, kobj);
                list_del(&p->entry);
-               sysfs_remove_group(p, &pma_group);
+               sysfs_remove_group(p, port->pma_table);
                sysfs_remove_group(p, &port->pkey_group);
                sysfs_remove_group(p, &port->gid_group);
+               sysfs_remove_group(&port->gid_attr_group->kobj,
+                                  &port->gid_attr_group->ndev);
+               sysfs_remove_group(&port->gid_attr_group->kobj,
+                                  &port->gid_attr_group->type);
+               kobject_put(&port->gid_attr_group->kobj);
                kobject_put(p);
        }
 
index 72feee620ebfb33300a2121a2a22d0fc1d1eafb3..2116132568e70e61c8465ffc9d922d732aca28ed 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/string.h>
 #include <linux/export.h>
 #include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <rdma/ib_pack.h>
 
@@ -116,6 +117,72 @@ static const struct ib_field vlan_table[]  = {
          .size_bits    = 16 }
 };
 
+static const struct ib_field ip4_table[]  = {
+       { STRUCT_FIELD(ip4, ver),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 4 },
+       { STRUCT_FIELD(ip4, hdr_len),
+         .offset_words = 0,
+         .offset_bits  = 4,
+         .size_bits    = 4 },
+       { STRUCT_FIELD(ip4, tos),
+         .offset_words = 0,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, tot_len),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, id),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, frag_off),
+         .offset_words = 1,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, ttl),
+         .offset_words = 2,
+         .offset_bits  = 0,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, protocol),
+         .offset_words = 2,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { STRUCT_FIELD(ip4, check),
+         .offset_words = 2,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(ip4, saddr),
+         .offset_words = 3,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { STRUCT_FIELD(ip4, daddr),
+         .offset_words = 4,
+         .offset_bits  = 0,
+         .size_bits    = 32 }
+};
+
+static const struct ib_field udp_table[]  = {
+       { STRUCT_FIELD(udp, sport),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, dport),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, length),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { STRUCT_FIELD(udp, csum),
+         .offset_words = 1,
+         .offset_bits  = 16,
+         .size_bits    = 16 }
+};
+
 static const struct ib_field grh_table[]  = {
        { STRUCT_FIELD(grh, ip_version),
          .offset_words = 0,
@@ -213,26 +280,59 @@ static const struct ib_field deth_table[] = {
          .size_bits    = 24 }
 };
 
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+       struct iphdr iph;
+
+       iph.ihl         = 5;
+       iph.version     = 4;
+       iph.tos         = header->ip4.tos;
+       iph.tot_len     = header->ip4.tot_len;
+       iph.id          = header->ip4.id;
+       iph.frag_off    = header->ip4.frag_off;
+       iph.ttl         = header->ip4.ttl;
+       iph.protocol    = header->ip4.protocol;
+       iph.check       = 0;
+       iph.saddr       = header->ip4.saddr;
+       iph.daddr       = header->ip4.daddr;
+
+       return ip_fast_csum((u8 *)&iph, iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
 /**
  * ib_ud_header_init - Initialize UD header structure
  * @payload_bytes:Length of packet payload
  * @lrh_present: specify if LRH is present
  * @eth_present: specify if Eth header is present
  * @vlan_present: packet is tagged vlan
- * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @grh_present: GRH flag (if non-zero, GRH will be included)
+ * @ip_version: if non-zero, IP header, V4 or V6, will be included
+ * @udp_present :if non-zero, UDP header will be included
  * @immediate_present: specify if immediate data is present
  * @header:Structure to initialize
  */
-void ib_ud_header_init(int                         payload_bytes,
-                      int                  lrh_present,
-                      int                  eth_present,
-                      int                  vlan_present,
-                      int                  grh_present,
-                      int                  immediate_present,
-                      struct ib_ud_header *header)
+int ib_ud_header_init(int     payload_bytes,
+                     int    lrh_present,
+                     int    eth_present,
+                     int    vlan_present,
+                     int    grh_present,
+                     int    ip_version,
+                     int    udp_present,
+                     int    immediate_present,
+                     struct ib_ud_header *header)
 {
+       size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
+       grh_present = grh_present && !ip_version;
        memset(header, 0, sizeof *header);
 
+       /*
+        * UDP header without IP header doesn't make sense
+        */
+       if (udp_present && ip_version != 4 && ip_version != 6)
+               return -EINVAL;
+
        if (lrh_present) {
                u16 packet_length;
 
@@ -252,16 +352,37 @@ void ib_ud_header_init(int                    payload_bytes,
        if (vlan_present)
                header->eth.type = cpu_to_be16(ETH_P_8021Q);
 
-       if (grh_present) {
+       if (ip_version == 6 || grh_present) {
                header->grh.ip_version      = 6;
                header->grh.payload_length  =
-                       cpu_to_be16((IB_BTH_BYTES     +
+                       cpu_to_be16((udp_bytes        +
+                                    IB_BTH_BYTES     +
                                     IB_DETH_BYTES    +
                                     payload_bytes    +
                                     4                + /* ICRC     */
                                     3) & ~3);          /* round up */
-               header->grh.next_header     = 0x1b;
+               header->grh.next_header     = udp_present ? IPPROTO_UDP : 0x1b;
+       }
+
+       if (ip_version == 4) {
+               header->ip4.ver = 4; /* version 4 */
+               header->ip4.hdr_len = 5; /* 5 words */
+               header->ip4.tot_len =
+                       cpu_to_be16(IB_IP4_BYTES   +
+                                    udp_bytes     +
+                                    IB_BTH_BYTES  +
+                                    IB_DETH_BYTES +
+                                    payload_bytes +
+                                    4);     /* ICRC     */
+               header->ip4.protocol = IPPROTO_UDP;
        }
+       if (udp_present && ip_version)
+               header->udp.length =
+                       cpu_to_be16(IB_UDP_BYTES   +
+                                    IB_BTH_BYTES  +
+                                    IB_DETH_BYTES +
+                                    payload_bytes +
+                                    4);     /* ICRC     */
 
        if (immediate_present)
                header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +394,11 @@ void ib_ud_header_init(int                     payload_bytes,
        header->lrh_present = lrh_present;
        header->eth_present = eth_present;
        header->vlan_present = vlan_present;
-       header->grh_present = grh_present;
+       header->grh_present = grh_present || (ip_version == 6);
+       header->ipv4_present = ip_version == 4;
+       header->udp_present = udp_present;
        header->immediate_present = immediate_present;
+       return 0;
 }
 EXPORT_SYMBOL(ib_ud_header_init);
 
@@ -311,6 +435,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
                        &header->grh, buf + len);
                len += IB_GRH_BYTES;
        }
+       if (header->ipv4_present) {
+               ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+                       &header->ip4, buf + len);
+               len += IB_IP4_BYTES;
+       }
+       if (header->udp_present) {
+               ib_pack(udp_table, ARRAY_SIZE(udp_table),
+                       &header->udp, buf + len);
+               len += IB_UDP_BYTES;
+       }
 
        ib_pack(bth_table, ARRAY_SIZE(bth_table),
                &header->bth, buf + len);
index 40becdb3196e07b97c94ade818af5755bfaae4db..e69bf266049d0117830577167c2987c83cd8ac62 100644 (file)
@@ -232,7 +232,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
        ib_ucontext_notifier_end_account(context);
 }
 
-static struct mmu_notifier_ops ib_umem_notifiers = {
+static const struct mmu_notifier_ops ib_umem_notifiers = {
        .release                    = ib_umem_notifier_release,
        .invalidate_page            = ib_umem_notifier_invalidate_page,
        .invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
index 57f281f8d686224b7a40488330b78ac38943270c..415a3185cde7f45dfc583180ef1a4ef81fec8699 100644 (file)
@@ -210,6 +210,7 @@ static void send_handler(struct ib_mad_agent *agent,
 }
 
 static void recv_handler(struct ib_mad_agent *agent,
+                        struct ib_mad_send_buf *send_buf,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_umad_file *file = agent->context;
index 94bbd8c155fcca1daf5f2e9ee0fc65552821ff3f..612ccfd39bf98181693f07c5dfcc661dc8191e39 100644 (file)
@@ -204,6 +204,8 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
 
+int uverbs_dealloc_mw(struct ib_mw *mw);
+
 struct ib_uverbs_flow_spec {
        union {
                union {
index 1c02deab068fbf1031d5b21db960feb9b06865c5..6ffc9c4e93afb4efa27fe6f3c17fc3cdf41d8c21 100644 (file)
@@ -291,9 +291,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        struct ib_uverbs_get_context      cmd;
        struct ib_uverbs_get_context_resp resp;
        struct ib_udata                   udata;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       struct ib_device_attr             dev_attr;
-#endif
        struct ib_ucontext               *ucontext;
        struct file                      *filp;
        int ret;
@@ -342,10 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        ucontext->odp_mrs_count = 0;
        INIT_LIST_HEAD(&ucontext->no_private_counters);
 
-       ret = ib_query_device(ib_dev, &dev_attr);
-       if (ret)
-               goto err_free;
-       if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+       if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
                ucontext->invalidate_range = NULL;
 
 #endif
@@ -447,8 +441,6 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 {
        struct ib_uverbs_query_device      cmd;
        struct ib_uverbs_query_device_resp resp;
-       struct ib_device_attr              attr;
-       int                                ret;
 
        if (out_len < sizeof resp)
                return -ENOSPC;
@@ -456,12 +448,8 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       ret = ib_query_device(ib_dev, &attr);
-       if (ret)
-               return ret;
-
        memset(&resp, 0, sizeof resp);
-       copy_query_dev_fields(file, ib_dev, &resp, &attr);
+       copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
@@ -986,11 +974,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        }
 
        if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
-               struct ib_device_attr attr;
-
-               ret = ib_query_device(pd->device, &attr);
-               if (ret || !(attr.device_cap_flags &
-                               IB_DEVICE_ON_DEMAND_PAGING)) {
+               if (!(pd->device->attrs.device_cap_flags &
+                     IB_DEVICE_ON_DEMAND_PAGING)) {
                        pr_debug("ODP support not available\n");
                        ret = -EINVAL;
                        goto err_put;
@@ -1008,7 +993,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        mr->pd      = pd;
        mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
-       atomic_set(&mr->usecnt, 0);
 
        uobj->object = mr;
        ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
@@ -1106,11 +1090,6 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
                }
        }
 
-       if (atomic_read(&mr->usecnt)) {
-               ret = -EBUSY;
-               goto put_uobj_pd;
-       }
-
        old_pd = mr->pd;
        ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
                                        cmd.length, cmd.hca_va,
@@ -1258,7 +1237,7 @@ err_copy:
        idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 
 err_unalloc:
-       ib_dealloc_mw(mw);
+       uverbs_dealloc_mw(mw);
 
 err_put:
        put_pd_read(pd);
@@ -1287,7 +1266,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 
        mw = uobj->object;
 
-       ret = ib_dealloc_mw(mw);
+       ret = uverbs_dealloc_mw(mw);
        if (!ret)
                uobj->live = 0;
 
@@ -1845,7 +1824,10 @@ static int create_qp(struct ib_uverbs_file *file,
                      sizeof(cmd->create_flags))
                attr.create_flags = cmd->create_flags;
 
-       if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+       if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+                               IB_QP_CREATE_CROSS_CHANNEL |
+                               IB_QP_CREATE_MANAGED_SEND |
+                               IB_QP_CREATE_MANAGED_RECV)) {
                ret = -EINVAL;
                goto err_put;
        }
index e3ef28861be63dd453d0fd1b55ec21f939919624..39680aed99dd0d4593f91317e666393c562d33d3 100644 (file)
@@ -133,6 +133,17 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
+int uverbs_dealloc_mw(struct ib_mw *mw)
+{
+       struct ib_pd *pd = mw->pd;
+       int ret;
+
+       ret = mw->device->dealloc_mw(mw);
+       if (!ret)
+               atomic_dec(&pd->usecnt);
+       return ret;
+}
+
 static void ib_uverbs_release_dev(struct kobject *kobj)
 {
        struct ib_uverbs_device *dev =
@@ -224,7 +235,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                struct ib_mw *mw = uobj->object;
 
                idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-               ib_dealloc_mw(mw);
+               uverbs_dealloc_mw(mw);
                kfree(uobj);
        }
 
index 7d2f14c9bbefaf1f3c28eac11b7bcefbc2802927..af020f80d50f4888436db49f935f29a0f5f5b22e 100644 (file)
@@ -144,5 +144,6 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
        memset(dst->dmac, 0, sizeof(dst->dmac));
        dst->net = NULL;
        dst->ifindex = 0;
+       dst->gid_type = IB_GID_TYPE_IB;
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
index 545906dec26dc5e936df37c635c68b94bb27e226..5af6d024e0538a9eb63049ef7009ad7dab09a967 100644 (file)
@@ -229,12 +229,6 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
 {
        struct ib_pd *pd;
-       struct ib_device_attr devattr;
-       int rc;
-
-       rc = ib_query_device(device, &devattr);
-       if (rc)
-               return ERR_PTR(rc);
 
        pd = device->alloc_pd(device, NULL, NULL);
        if (IS_ERR(pd))
@@ -245,7 +239,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
        pd->local_mr = NULL;
        atomic_set(&pd->usecnt, 0);
 
-       if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+       if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
                pd->local_dma_lkey = device->local_dma_lkey;
        else {
                struct ib_mr *mr;
@@ -311,8 +305,61 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
+static int ib_get_header_version(const union rdma_network_hdr *hdr)
+{
+       const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
+       struct iphdr ip4h_checked;
+       const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
+
+       /* If it's IPv6, the version must be 6, otherwise, the first
+        * 20 bytes (before the IPv4 header) are garbled.
+        */
+       if (ip6h->version != 6)
+               return (ip4h->version == 4) ? 4 : 0;
+       /* version may be 6 or 4 because the first 20 bytes could be garbled */
+
+       /* RoCE v2 requires no options, thus header length
+        * must be 5 words
+        */
+       if (ip4h->ihl != 5)
+               return 6;
+
+       /* Verify checksum.
+        * We can't write on scattered buffers so we need to copy to
+        * temp buffer.
+        */
+       memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+       ip4h_checked.check = 0;
+       ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
+       /* if IPv4 header checksum is OK, believe it */
+       if (ip4h->check == ip4h_checked.check)
+               return 4;
+       return 6;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+                                                    u8 port_num,
+                                                    const struct ib_grh *grh)
+{
+       int grh_version;
+
+       if (rdma_protocol_ib(device, port_num))
+               return RDMA_NETWORK_IB;
+
+       grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+
+       if (grh_version == 4)
+               return RDMA_NETWORK_IPV4;
+
+       if (grh->next_hdr == IPPROTO_UDP)
+               return RDMA_NETWORK_IPV6;
+
+       return RDMA_NETWORK_ROCE_V1;
+}
+
 struct find_gid_index_context {
        u16 vlan_id;
+       enum ib_gid_type gid_type;
 };
 
 static bool find_gid_index(const union ib_gid *gid,
@@ -322,6 +369,9 @@ static bool find_gid_index(const union ib_gid *gid,
        struct find_gid_index_context *ctx =
                (struct find_gid_index_context *)context;
 
+       if (ctx->gid_type != gid_attr->gid_type)
+               return false;
+
        if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
            (is_vlan_dev(gid_attr->ndev) &&
             vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
@@ -332,14 +382,49 @@ static bool find_gid_index(const union ib_gid *gid,
 
 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
                                   u16 vlan_id, const union ib_gid *sgid,
+                                  enum ib_gid_type gid_type,
                                   u16 *gid_index)
 {
-       struct find_gid_index_context context = {.vlan_id = vlan_id};
+       struct find_gid_index_context context = {.vlan_id = vlan_id,
+                                                .gid_type = gid_type};
 
        return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
                                     &context, gid_index);
 }
 
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+                                 enum rdma_network_type net_type,
+                                 union ib_gid *sgid, union ib_gid *dgid)
+{
+       struct sockaddr_in  src_in;
+       struct sockaddr_in  dst_in;
+       __be32 src_saddr, dst_saddr;
+
+       if (!sgid || !dgid)
+               return -EINVAL;
+
+       if (net_type == RDMA_NETWORK_IPV4) {
+               memcpy(&src_in.sin_addr.s_addr,
+                      &hdr->roce4grh.saddr, 4);
+               memcpy(&dst_in.sin_addr.s_addr,
+                      &hdr->roce4grh.daddr, 4);
+               src_saddr = src_in.sin_addr.s_addr;
+               dst_saddr = dst_in.sin_addr.s_addr;
+               ipv6_addr_set_v4mapped(src_saddr,
+                                      (struct in6_addr *)sgid);
+               ipv6_addr_set_v4mapped(dst_saddr,
+                                      (struct in6_addr *)dgid);
+               return 0;
+       } else if (net_type == RDMA_NETWORK_IPV6 ||
+                  net_type == RDMA_NETWORK_IB) {
+               *dgid = hdr->ibgrh.dgid;
+               *sgid = hdr->ibgrh.sgid;
+               return 0;
+       } else {
+               return -EINVAL;
+       }
+}
+
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                       const struct ib_wc *wc, const struct ib_grh *grh,
                       struct ib_ah_attr *ah_attr)
@@ -347,33 +432,72 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
        u32 flow_class;
        u16 gid_index;
        int ret;
+       enum rdma_network_type net_type = RDMA_NETWORK_IB;
+       enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+       int hoplimit = 0xff;
+       union ib_gid dgid;
+       union ib_gid sgid;
 
        memset(ah_attr, 0, sizeof *ah_attr);
        if (rdma_cap_eth_ah(device, port_num)) {
+               if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+                       net_type = wc->network_hdr_type;
+               else
+                       net_type = ib_get_net_type_by_grh(device, port_num, grh);
+               gid_type = ib_network_to_gid_type(net_type);
+       }
+       ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+                                    &sgid, &dgid);
+       if (ret)
+               return ret;
+
+       if (rdma_protocol_roce(device, port_num)) {
+               int if_index = 0;
                u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
                                wc->vlan_id : 0xffff;
+               struct net_device *idev;
+               struct net_device *resolved_dev;
 
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
 
-               if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
-                   !(wc->wc_flags & IB_WC_WITH_VLAN)) {
-                       ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
-                                                        ah_attr->dmac,
-                                                        wc->wc_flags & IB_WC_WITH_VLAN ?
-                                                        NULL : &vlan_id,
-                                                        0);
-                       if (ret)
-                               return ret;
+               if (!device->get_netdev)
+                       return -EOPNOTSUPP;
+
+               idev = device->get_netdev(device, port_num);
+               if (!idev)
+                       return -ENODEV;
+
+               ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
+                                                  ah_attr->dmac,
+                                                  wc->wc_flags & IB_WC_WITH_VLAN ?
+                                                  NULL : &vlan_id,
+                                                  &if_index, &hoplimit);
+               if (ret) {
+                       dev_put(idev);
+                       return ret;
                }
 
-               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
-                                             &grh->dgid, &gid_index);
+               resolved_dev = dev_get_by_index(&init_net, if_index);
+               if (resolved_dev->flags & IFF_LOOPBACK) {
+                       dev_put(resolved_dev);
+                       resolved_dev = idev;
+                       dev_hold(resolved_dev);
+               }
+               rcu_read_lock();
+               if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
+                                                                  resolved_dev))
+                       ret = -EHOSTUNREACH;
+               rcu_read_unlock();
+               dev_put(idev);
+               dev_put(resolved_dev);
                if (ret)
                        return ret;
 
-               if (wc->wc_flags & IB_WC_WITH_SMAC)
-                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+                                             &dgid, gid_type, &gid_index);
+               if (ret)
+                       return ret;
        }
 
        ah_attr->dlid = wc->slid;
@@ -383,10 +507,11 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 
        if (wc->wc_flags & IB_WC_GRH) {
                ah_attr->ah_flags = IB_AH_GRH;
-               ah_attr->grh.dgid = grh->sgid;
+               ah_attr->grh.dgid = sgid;
 
                if (!rdma_cap_eth_ah(device, port_num)) {
-                       ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+                       ret = ib_find_cached_gid_by_port(device, &dgid,
+                                                        IB_GID_TYPE_IB,
                                                         port_num, NULL,
                                                         &gid_index);
                        if (ret)
@@ -396,7 +521,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                ah_attr->grh.sgid_index = (u8) gid_index;
                flow_class = be32_to_cpu(grh->version_tclass_flow);
                ah_attr->grh.flow_label = flow_class & 0xFFFFF;
-               ah_attr->grh.hop_limit = 0xFF;
+               ah_attr->grh.hop_limit = hoplimit;
                ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
        }
        return 0;
@@ -1014,6 +1139,7 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
                        union ib_gid            sgid;
                        struct ib_gid_attr      sgid_attr;
                        int                     ifindex;
+                       int                     hop_limit;
 
                        ret = ib_query_gid(qp->device,
                                           qp_attr->ah_attr.port_num,
@@ -1028,12 +1154,14 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
 
                        ifindex = sgid_attr.ndev->ifindex;
 
-                       ret = rdma_addr_find_dmac_by_grh(&sgid,
-                                                        &qp_attr->ah_attr.grh.dgid,
-                                                        qp_attr->ah_attr.dmac,
-                                                        NULL, ifindex);
+                       ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+                                                          &qp_attr->ah_attr.grh.dgid,
+                                                          qp_attr->ah_attr.dmac,
+                                                          NULL, &ifindex, &hop_limit);
 
                        dev_put(sgid_attr.ndev);
+
+                       qp_attr->ah_attr.grh.hop_limit = hop_limit;
                }
        }
 out:
@@ -1215,29 +1343,17 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
                mr->pd      = pd;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
        }
 
        return mr;
 }
 EXPORT_SYMBOL(ib_get_dma_mr);
 
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
-       return mr->device->query_mr ?
-               mr->device->query_mr(mr, mr_attr) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_mr);
-
 int ib_dereg_mr(struct ib_mr *mr)
 {
-       struct ib_pd *pd;
+       struct ib_pd *pd = mr->pd;
        int ret;
 
-       if (atomic_read(&mr->usecnt))
-               return -EBUSY;
-
-       pd = mr->pd;
        ret = mr->device->dereg_mr(mr);
        if (!ret)
                atomic_dec(&pd->usecnt);
@@ -1273,49 +1389,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
                mr->pd      = pd;
                mr->uobject = NULL;
                atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
        }
 
        return mr;
 }
 EXPORT_SYMBOL(ib_alloc_mr);
 
-/* Memory windows */
-
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-       struct ib_mw *mw;
-
-       if (!pd->device->alloc_mw)
-               return ERR_PTR(-ENOSYS);
-
-       mw = pd->device->alloc_mw(pd, type);
-       if (!IS_ERR(mw)) {
-               mw->device  = pd->device;
-               mw->pd      = pd;
-               mw->uobject = NULL;
-               mw->type    = type;
-               atomic_inc(&pd->usecnt);
-       }
-
-       return mw;
-}
-EXPORT_SYMBOL(ib_alloc_mw);
-
-int ib_dealloc_mw(struct ib_mw *mw)
-{
-       struct ib_pd *pd;
-       int ret;
-
-       pd = mw->pd;
-       ret = mw->device->dealloc_mw(mw);
-       if (!ret)
-               atomic_dec(&pd->usecnt);
-
-       return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_mw);
-
 /* "Fast" memory regions */
 
 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -1530,7 +1609,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
                   int (*set_page)(struct ib_mr *, u64))
 {
        struct scatterlist *sg;
-       u64 last_end_dma_addr = 0, last_page_addr = 0;
+       u64 last_end_dma_addr = 0;
        unsigned int last_page_off = 0;
        u64 page_mask = ~((u64)mr->page_size - 1);
        int i, ret;
@@ -1572,7 +1651,6 @@ next_page:
 
                mr->length += dma_len;
                last_end_dma_addr = end_dma_addr;
-               last_page_addr = end_dma_addr & page_mask;
                last_page_off = end_dma_addr & ~page_mask;
        }
 
index cb78b1e9bcd9ec3035884e3c0c6504bf317dbfc8..f504ba73e5dc27200c988f342ee3d6423072ee9e 100644 (file)
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
        error = l2t_send(tdev, skb, l2e);
        if (error < 0)
                kfree_skb(skb);
-       return error;
+       return error < 0 ? error : 0;
 }
 
 int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
        error = cxgb3_ofld_send(tdev, skb);
        if (error < 0)
                kfree_skb(skb);
-       return error;
+       return error < 0 ? error : 0;
 }
 
 static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
index cfe404925a399f2be0cd1bdbc23c262ad1b24ca9..97fbfd2c298ecc431ba7049e28f13ea33ba8a520 100644 (file)
@@ -115,10 +115,6 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
                case T3_SEND_WITH_SE_INV:
                        wc->opcode = IB_WC_SEND;
                        break;
-               case T3_BIND_MW:
-                       wc->opcode = IB_WC_BIND_MW;
-                       break;
-
                case T3_LOCAL_INV:
                        wc->opcode = IB_WC_LOCAL_INV;
                        break;
index 5c36ee2809acf52f8f212483dfb81e5649357baf..1d04c872c9d58c69a47dbb1b1c38f2ae69d80cde 100644 (file)
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
        return ret;
 }
 
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-                                       struct iwch_mr *mhp,
-                                       int shift,
-                                       int npages)
-{
-       u32 stag;
-       int ret;
-
-       /* We could support this... */
-       if (npages > mhp->attr.pbl_size)
-               return -ENOMEM;
-
-       stag = mhp->attr.stag;
-       if (cxio_reregister_phys_mem(&rhp->rdev,
-                                  &stag, mhp->attr.pdid,
-                                  mhp->attr.perms,
-                                  mhp->attr.zbva,
-                                  mhp->attr.va_fbo,
-                                  mhp->attr.len,
-                                  shift - 12,
-                                  mhp->attr.pbl_size, mhp->attr.pbl_addr))
-               return -ENOMEM;
-
-       ret = iwch_finish_mem_reg(mhp, stag);
-       if (ret)
-               cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-                      mhp->attr.pbl_addr);
-
-       return ret;
-}
-
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
 {
        mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
        return cxio_write_pbl(&mhp->rhp->rdev, pages,
                              mhp->attr.pbl_addr + (offset << 3), npages);
 }
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       u64 *iova_start,
-                                       u64 *total_size,
-                                       int *npages,
-                                       int *shift,
-                                       __be64 **page_list)
-{
-       u64 mask;
-       int i, j, n;
-
-       mask = 0;
-       *total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
-                       return -EINVAL;
-               if (i != 0 && i != num_phys_buf - 1 &&
-                   (buffer_list[i].size & ~PAGE_MASK))
-                       return -EINVAL;
-               *total_size += buffer_list[i].size;
-               if (i > 0)
-                       mask |= buffer_list[i].addr;
-               else
-                       mask |= buffer_list[i].addr & PAGE_MASK;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-               else
-                       mask |= (buffer_list[i].addr + buffer_list[i].size +
-                               PAGE_SIZE - 1) & PAGE_MASK;
-       }
-
-       if (*total_size > 0xFFFFFFFFULL)
-               return -ENOMEM;
-
-       /* Find largest page shift we can use to cover buffers */
-       for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
-               if ((1ULL << *shift) & mask)
-                       break;
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
-       buffer_list[0].addr &= ~0ull << *shift;
-
-       *npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               *npages += (buffer_list[i].size +
-                       (1ULL << *shift) - 1) >> *shift;
-
-       if (!*npages)
-               return -EINVAL;
-
-       *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
-       if (!*page_list)
-               return -ENOMEM;
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
-                    ++j)
-                       (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
-                           ((u64) j << *shift));
-
-       PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-            __func__, (unsigned long long) *iova_start,
-            (unsigned long long) mask, *shift, (unsigned long long) *total_size,
-            *npages);
-
-       return 0;
-
-}
index c34725ca0bb426606e16708fafab2995490d7187..2734820d291b02387b8e925bb5f09ab6e23418dd 100644 (file)
@@ -458,9 +458,6 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        u32 mmid;
 
        PDBG("%s ib_mr %p\n", __func__, ib_mr);
-       /* There can be no memory windows */
-       if (atomic_read(&ib_mr->usecnt))
-               return -EINVAL;
 
        mhp = to_iwch_mr(ib_mr);
        kfree(mhp->pages);
@@ -479,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        return 0;
 }
 
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
-                                       struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       int acc,
-                                       u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
 {
-       __be64 *page_list;
-       int shift;
-       u64 total_size;
-       int npages;
-       struct iwch_dev *rhp;
-       struct iwch_pd *php;
+       const u64 total_size = 0xffffffff;
+       const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+       struct iwch_pd *php = to_iwch_pd(pd);
+       struct iwch_dev *rhp = php->rhp;
        struct iwch_mr *mhp;
-       int ret;
+       __be64 *page_list;
+       int shift = 26, npages, ret, i;
 
        PDBG("%s ib_pd %p\n", __func__, pd);
-       php = to_iwch_pd(pd);
-       rhp = php->rhp;
+
+       /*
+        * T3 only supports 32 bits of size.
+        */
+       if (sizeof(phys_addr_t) > 4) {
+               pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+               return ERR_PTR(-ENOTSUPP);
+       }
 
        mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
        if (!mhp)
@@ -504,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
 
        mhp->rhp = rhp;
 
-       /* First check that we have enough alignment */
-       if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+       npages = (total_size + (1ULL << shift) - 1) >> shift;
+       if (!npages) {
                ret = -EINVAL;
                goto err;
        }
 
-       if (num_phys_buf > 1 &&
-           ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
-               ret = -EINVAL;
+       page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+       if (!page_list) {
+               ret = -ENOMEM;
                goto err;
        }
 
-       ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
-                                  &total_size, &npages, &shift, &page_list);
-       if (ret)
-               goto err;
+       for (i = 0; i < npages; i++)
+               page_list[i] = cpu_to_be64((u64)i << shift);
+
+       PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+               __func__, mask, shift, total_size, npages);
 
        ret = iwch_alloc_pbl(mhp, npages);
        if (ret) {
@@ -536,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
        mhp->attr.zbva = 0;
 
        mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = *iova_start;
+       mhp->attr.va_fbo = 0;
        mhp->attr.page_size = shift - 12;
 
        mhp->attr.len = (u32) total_size;
@@ -553,76 +552,8 @@ err_pbl:
 err:
        kfree(mhp);
        return ERR_PTR(ret);
-
-}
-
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
-                                    int mr_rereg_mask,
-                                    struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf,
-                                    int acc, u64 * iova_start)
-{
-
-       struct iwch_mr mh, *mhp;
-       struct iwch_pd *php;
-       struct iwch_dev *rhp;
-       __be64 *page_list = NULL;
-       int shift = 0;
-       u64 total_size;
-       int npages = 0;
-       int ret;
-
-       PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
-       /* There can be no memory windows */
-       if (atomic_read(&mr->usecnt))
-               return -EINVAL;
-
-       mhp = to_iwch_mr(mr);
-       rhp = mhp->rhp;
-       php = to_iwch_pd(mr->pd);
-
-       /* make sure we are on the same adapter */
-       if (rhp != php->rhp)
-               return -EINVAL;
-
-       memcpy(&mh, mhp, sizeof *mhp);
-
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               php = to_iwch_pd(pd);
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mh.attr.perms = iwch_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               ret = build_phys_page_list(buffer_list, num_phys_buf,
-                                          iova_start,
-                                          &total_size, &npages,
-                                          &shift, &page_list);
-               if (ret)
-                       return ret;
-       }
-
-       ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
-       kfree(page_list);
-       if (ret) {
-               return ret;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mhp->attr.pdid = php->pdid;
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               mhp->attr.zbva = 0;
-               mhp->attr.va_fbo = *iova_start;
-               mhp->attr.page_size = shift - 12;
-               mhp->attr.len = (u32) total_size;
-               mhp->attr.pbl_size = npages;
-       }
-
-       return 0;
 }
 
-
 static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                      u64 virt, int acc, struct ib_udata *udata)
 {
@@ -726,28 +657,6 @@ err:
        return ERR_PTR(err);
 }
 
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ib_phys_buf bl;
-       u64 kva;
-       struct ib_mr *ibmr;
-
-       PDBG("%s ib_pd %p\n", __func__, pd);
-
-       /*
-        * T3 only supports 32 bits of size.
-        */
-       if (sizeof(phys_addr_t) > 4) {
-               pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
-               return ERR_PTR(-ENOTSUPP);
-       }
-       bl.size = 0xffffffff;
-       bl.addr = 0;
-       kva = 0;
-       ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
-       return ibmr;
-}
-
 static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
        struct iwch_dev *rhp;
@@ -1452,12 +1361,9 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.resize_cq = iwch_resize_cq;
        dev->ibdev.poll_cq = iwch_poll_cq;
        dev->ibdev.get_dma_mr = iwch_get_dma_mr;
-       dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
-       dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
        dev->ibdev.reg_user_mr = iwch_reg_user_mr;
        dev->ibdev.dereg_mr = iwch_dereg_mr;
        dev->ibdev.alloc_mw = iwch_alloc_mw;
-       dev->ibdev.bind_mw = iwch_bind_mw;
        dev->ibdev.dealloc_mw = iwch_dealloc_mw;
        dev->ibdev.alloc_mr = iwch_alloc_mr;
        dev->ibdev.map_mr_sg = iwch_map_mr_sg;
index 2ac85b86a680dea89becaf76c75956cda42a1644..252c464a09f6a7ef6c4360a7449ba973e36369c0 100644 (file)
@@ -330,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_post_zb_read(struct iwch_ep *ep);
@@ -341,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
 void stop_read_rep_timer(struct iwch_qp *qhp);
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                      struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-                                       struct iwch_mr *mhp,
-                                       int shift,
-                                       int npages);
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
 void iwch_free_pbl(struct iwch_mr *mhp);
 int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       u64 *iova_start,
-                                       u64 *total_size,
-                                       int *npages,
-                                       int *shift,
-                                       __be64 **page_list);
-
 
 #define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
 
index d0548fc6395eac2343775ea4a4b85d3c83bb98b0..d939980a708fdcf1daa6a7a53686cb6509ef2bb5 100644 (file)
@@ -526,88 +526,6 @@ out:
        return err;
 }
 
-int iwch_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind)
-{
-       struct iwch_dev *rhp;
-       struct iwch_mw *mhp;
-       struct iwch_qp *qhp;
-       union t3_wr *wqe;
-       u32 pbl_addr;
-       u8 page_size;
-       u32 num_wrs;
-       unsigned long flag;
-       struct ib_sge sgl;
-       int err=0;
-       enum t3_wr_flags t3_wr_flags;
-       u32 idx;
-       struct t3_swsq *sqp;
-
-       qhp = to_iwch_qp(qp);
-       mhp = to_iwch_mw(mw);
-       rhp = qhp->rhp;
-
-       spin_lock_irqsave(&qhp->lock, flag);
-       if (qhp->attr.state > IWCH_QP_STATE_RTS) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return -EINVAL;
-       }
-       num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
-                           qhp->wq.sq_size_log2);
-       if (num_wrs == 0) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return -ENOMEM;
-       }
-       idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
-       PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
-            mw, mw_bind);
-       wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
-       t3_wr_flags = 0;
-       if (mw_bind->send_flags & IB_SEND_SIGNALED)
-               t3_wr_flags = T3_COMPLETION_FLAG;
-
-       sgl.addr = mw_bind->bind_info.addr;
-       sgl.lkey = mw_bind->bind_info.mr->lkey;
-       sgl.length = mw_bind->bind_info.length;
-       wqe->bind.reserved = 0;
-       wqe->bind.type = TPT_VATO;
-
-       /* TBD: check perms */
-       wqe->bind.perms = iwch_ib_to_tpt_bind_access(
-               mw_bind->bind_info.mw_access_flags);
-       wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
-       wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
-       wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
-       wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
-       err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
-       if (err) {
-               spin_unlock_irqrestore(&qhp->lock, flag);
-               return err;
-       }
-       wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
-       sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
-       sqp->wr_id = mw_bind->wr_id;
-       sqp->opcode = T3_BIND_MW;
-       sqp->sq_wptr = qhp->wq.sq_wptr;
-       sqp->complete = 0;
-       sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
-       wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
-       wqe->bind.mr_pagesz = page_size;
-       build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
-                      Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
-                      sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
-       ++(qhp->wq.wptr);
-       ++(qhp->wq.sq_wptr);
-       spin_unlock_irqrestore(&qhp->lock, flag);
-
-       if (cxio_wq_db_enabled(&qhp->wq))
-               ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-       return err;
-}
-
 static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
                                    u8 *layer_type, u8 *ecode)
 {
index 326d07d823a5e62f0ee206b8c5f5865f7f97c08d..cd2ff5f9518a2b7143d53821672a5d98b9773b17 100644 (file)
@@ -3271,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
                                    &ep->com.mapped_local_addr;
 
+       if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+               err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+                                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+               if (err)
+                       return err;
+       }
        c4iw_init_wr_wait(&ep->com.wr_wait);
        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
                                   ep->stid, &sin6->sin6_addr,
@@ -3282,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
                                          0, 0, __func__);
        else if (err > 0)
                err = net_xmit_errno(err);
-       if (err)
+       if (err) {
+               cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+                                  (const u32 *)&sin6->sin6_addr.s6_addr, 1);
                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
                       err, ep->stid,
                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
-       else
-               cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
-                              (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+       }
        return err;
 }
 
index de9cd6901752fc1e3da38d64f62bfce7cb853501..cf21df4a8bf5b80da00f6f82003980650df9c522 100644 (file)
@@ -744,9 +744,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                case FW_RI_SEND_WITH_SE:
                        wc->opcode = IB_WC_SEND;
                        break;
-               case FW_RI_BIND_MW:
-                       wc->opcode = IB_WC_BIND_MW;
-                       break;
 
                case FW_RI_LOCAL_INV:
                        wc->opcode = IB_WC_LOCAL_INV;
index 58fce1742b8d8c33b91dfa89075617a9c5cf34f7..8024ea4417b8735b93e77dc9310314db44b8833e 100644 (file)
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
 static int qp_open(struct inode *inode, struct file *file)
 {
        struct c4iw_debugfs_data *qpd;
-       int ret = 0;
        int count = 1;
 
        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
-       if (!qpd) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!qpd)
+               return -ENOMEM;
+
        qpd->devp = inode->i_private;
        qpd->pos = 0;
 
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
        qpd->bufsize = count * 128;
        qpd->buf = vmalloc(qpd->bufsize);
        if (!qpd->buf) {
-               ret = -ENOMEM;
-               goto err1;
+               kfree(qpd);
+               return -ENOMEM;
        }
 
        spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
 
        qpd->buf[qpd->pos++] = 0;
        file->private_data = qpd;
-       goto out;
-err1:
-       kfree(qpd);
-out:
-       return ret;
+       return 0;
 }
 
 static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
                       rdev->lldi.ucq_density);
-               err = -EINVAL;
-               goto err1;
+               return -EINVAL;
        }
        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
                       rdev->lldi.vr->cq.size);
-               err = -EINVAL;
-               goto err1;
+               return -EINVAL;
        }
 
        rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
             rdev->lldi.db_reg, rdev->lldi.gts_reg,
             rdev->qpmask, rdev->cqmask);
 
-       if (c4iw_num_stags(rdev) == 0) {
-               err = -EINVAL;
-               goto err1;
-       }
+       if (c4iw_num_stags(rdev) == 0)
+               return -EINVAL;
 
        rdev->stats.pd.total = T4_MAX_NUM_PD;
        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing resources\n", err);
-               goto err1;
+               return err;
        }
        err = c4iw_pblpool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
-               goto err2;
+               goto destroy_resource;
        }
        err = c4iw_rqtpool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
-               goto err3;
+               goto destroy_pblpool;
        }
        err = c4iw_ocqp_pool_create(rdev);
        if (err) {
                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
-               goto err4;
+               goto destroy_rqtpool;
        }
        rdev->status_page = (struct t4_dev_status_page *)
                            __get_free_page(GFP_KERNEL);
-       if (!rdev->status_page) {
-               pr_err(MOD "error allocating status page\n");
-               goto err4;
-       }
+       if (!rdev->status_page)
+               goto destroy_ocqp_pool;
+       rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+       rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+       rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+       rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
 
        if (c4iw_wr_log) {
                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        rdev->status_page->db_off = 0;
 
        return 0;
-err4:
+destroy_ocqp_pool:
+       c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
        c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
        c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
        c4iw_destroy_resource(&rdev->resource);
-err1:
        return err;
 }
 
index 00e55faa086aa2a30259c5c8b96cc0ae129a4ef6..fb2de75a039216e2f732a8f61ba387de121f1ce7 100644 (file)
@@ -947,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind);
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
 int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -968,17 +966,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
                                           u64 length, u64 virt, int acc,
                                           struct ib_udata *udata);
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
-                                       struct ib_phys_buf *buffer_list,
-                                       int num_phys_buf,
-                                       int acc,
-                                       u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
-                                    int mr_rereg_mask,
-                                    struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf,
-                                    int acc, u64 *iova_start);
 int c4iw_dereg_mr(struct ib_mr *ib_mr);
 int c4iw_destroy_cq(struct ib_cq *ib_cq);
 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
index e1629ab58db7873a3d9a6c044ba7bc65eb4512c6..7849890c478141e57136127c84f4cad9fd46227d 100644 (file)
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
        return ret;
 }
 
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
-                         struct c4iw_mr *mhp, int shift, int npages)
-{
-       u32 stag;
-       int ret;
-
-       if (npages > mhp->attr.pbl_size)
-               return -ENOMEM;
-
-       stag = mhp->attr.stag;
-       ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
-                             FW_RI_STAG_NSMR, mhp->attr.perms,
-                             mhp->attr.mw_bind_enable, mhp->attr.zbva,
-                             mhp->attr.va_fbo, mhp->attr.len, shift - 12,
-                             mhp->attr.pbl_size, mhp->attr.pbl_addr);
-       if (ret)
-               return ret;
-
-       ret = finish_mem_reg(mhp, stag);
-       if (ret)
-               dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-                      mhp->attr.pbl_addr);
-
-       return ret;
-}
-
 static int alloc_pbl(struct c4iw_mr *mhp, int npages)
 {
        mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
        return 0;
 }
 
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, u64 *iova_start,
-                               u64 *total_size, int *npages,
-                               int *shift, __be64 **page_list)
-{
-       u64 mask;
-       int i, j, n;
-
-       mask = 0;
-       *total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
-                       return -EINVAL;
-               if (i != 0 && i != num_phys_buf - 1 &&
-                   (buffer_list[i].size & ~PAGE_MASK))
-                       return -EINVAL;
-               *total_size += buffer_list[i].size;
-               if (i > 0)
-                       mask |= buffer_list[i].addr;
-               else
-                       mask |= buffer_list[i].addr & PAGE_MASK;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-               else
-                       mask |= (buffer_list[i].addr + buffer_list[i].size +
-                               PAGE_SIZE - 1) & PAGE_MASK;
-       }
-
-       if (*total_size > 0xFFFFFFFFULL)
-               return -ENOMEM;
-
-       /* Find largest page shift we can use to cover buffers */
-       for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
-               if ((1ULL << *shift) & mask)
-                       break;
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
-       buffer_list[0].addr &= ~0ull << *shift;
-
-       *npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               *npages += (buffer_list[i].size +
-                       (1ULL << *shift) - 1) >> *shift;
-
-       if (!*npages)
-               return -EINVAL;
-
-       *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
-       if (!*page_list)
-               return -ENOMEM;
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
-                    ++j)
-                       (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
-                           ((u64) j << *shift));
-
-       PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-            __func__, (unsigned long long)*iova_start,
-            (unsigned long long)mask, *shift, (unsigned long long)*total_size,
-            *npages);
-
-       return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
-                            struct ib_pd *pd, struct ib_phys_buf *buffer_list,
-                            int num_phys_buf, int acc, u64 *iova_start)
-{
-
-       struct c4iw_mr mh, *mhp;
-       struct c4iw_pd *php;
-       struct c4iw_dev *rhp;
-       __be64 *page_list = NULL;
-       int shift = 0;
-       u64 total_size;
-       int npages;
-       int ret;
-
-       PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
-       /* There can be no memory windows */
-       if (atomic_read(&mr->usecnt))
-               return -EINVAL;
-
-       mhp = to_c4iw_mr(mr);
-       rhp = mhp->rhp;
-       php = to_c4iw_pd(mr->pd);
-
-       /* make sure we are on the same adapter */
-       if (rhp != php->rhp)
-               return -EINVAL;
-
-       memcpy(&mh, mhp, sizeof *mhp);
-
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               php = to_c4iw_pd(pd);
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
-               mh.attr.perms = c4iw_ib_to_tpt_access(acc);
-               mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
-                                        IB_ACCESS_MW_BIND;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               ret = build_phys_page_list(buffer_list, num_phys_buf,
-                                               iova_start,
-                                               &total_size, &npages,
-                                               &shift, &page_list);
-               if (ret)
-                       return ret;
-       }
-
-       if (mr_exceeds_hw_limits(rhp, total_size)) {
-               kfree(page_list);
-               return -EINVAL;
-       }
-
-       ret = reregister_mem(rhp, php, &mh, shift, npages);
-       kfree(page_list);
-       if (ret)
-               return ret;
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mhp->attr.pdid = php->pdid;
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               mhp->attr.zbva = 0;
-               mhp->attr.va_fbo = *iova_start;
-               mhp->attr.page_size = shift - 12;
-               mhp->attr.len = (u32) total_size;
-               mhp->attr.pbl_size = npages;
-       }
-
-       return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
-                                    struct ib_phys_buf *buffer_list,
-                                    int num_phys_buf, int acc, u64 *iova_start)
-{
-       __be64 *page_list;
-       int shift;
-       u64 total_size;
-       int npages;
-       struct c4iw_dev *rhp;
-       struct c4iw_pd *php;
-       struct c4iw_mr *mhp;
-       int ret;
-
-       PDBG("%s ib_pd %p\n", __func__, pd);
-       php = to_c4iw_pd(pd);
-       rhp = php->rhp;
-
-       mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-       if (!mhp)
-               return ERR_PTR(-ENOMEM);
-
-       mhp->rhp = rhp;
-
-       /* First check that we have enough alignment */
-       if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       if (num_phys_buf > 1 &&
-           ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
-                                       &total_size, &npages, &shift,
-                                       &page_list);
-       if (ret)
-               goto err;
-
-       if (mr_exceeds_hw_limits(rhp, total_size)) {
-               kfree(page_list);
-               ret = -EINVAL;
-               goto err;
-       }
-
-       ret = alloc_pbl(mhp, npages);
-       if (ret) {
-               kfree(page_list);
-               goto err;
-       }
-
-       ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
-                            npages);
-       kfree(page_list);
-       if (ret)
-               goto err_pbl;
-
-       mhp->attr.pdid = php->pdid;
-       mhp->attr.zbva = 0;
-
-       mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = *iova_start;
-       mhp->attr.page_size = shift - 12;
-
-       mhp->attr.len = (u32) total_size;
-       mhp->attr.pbl_size = npages;
-       ret = register_mem(rhp, php, mhp, shift);
-       if (ret)
-               goto err_pbl;
-
-       return &mhp->ibmr;
-
-err_pbl:
-       c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
-                             mhp->attr.pbl_size << 3);
-
-err:
-       kfree(mhp);
-       return ERR_PTR(ret);
-
-}
-
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
 {
        struct c4iw_dev *rhp;
@@ -952,9 +704,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
        u32 mmid;
 
        PDBG("%s ib_mr %p\n", __func__, ib_mr);
-       /* There can be no memory windows */
-       if (atomic_read(&ib_mr->usecnt))
-               return -EINVAL;
 
        mhp = to_c4iw_mr(ib_mr);
        rhp = mhp->rhp;
index 0a7d99818b17d13384e32cb81446bd250647f85f..ec04272fbdc2ffbf882318678332a506c1f1d4ec 100644 (file)
@@ -549,12 +549,9 @@ int c4iw_register_device(struct c4iw_dev *dev)
        dev->ibdev.resize_cq = c4iw_resize_cq;
        dev->ibdev.poll_cq = c4iw_poll_cq;
        dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
-       dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
-       dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
        dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
        dev->ibdev.dereg_mr = c4iw_dereg_mr;
        dev->ibdev.alloc_mw = c4iw_alloc_mw;
-       dev->ibdev.bind_mw = c4iw_bind_mw;
        dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
        dev->ibdev.alloc_mr = c4iw_alloc_mr;
        dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
index aa515afee7248823428cb2b725bf10c70f4fd82a..e99345eb875aa286a8b962696eba9742189e660c 100644 (file)
@@ -933,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        return err;
 }
 
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
-       return -ENOSYS;
-}
-
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
                                    u8 *ecode)
 {
index 1092a2d1f607464152fbd2d7e836e180d2131312..6126bbe36095c21021f9cc5cdca3b75da299dc85 100644 (file)
@@ -699,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
 
 struct t4_dev_status_page {
        u8 db_off;
+       u8 pad1;
+       u16 pad2;
+       u32 pad3;
+       u64 qp_start;
+       u64 qp_size;
+       u64 cq_start;
+       u64 cq_size;
 };
index cbd0ce1707282a63fe1acb10c223c4af22e28ad1..295f422b9a3ab4caa829182712dcab6bc19a504d 100644 (file)
@@ -32,7 +32,7 @@
 #ifndef __C4IW_USER_H__
 #define __C4IW_USER_H__
 
-#define C4IW_UVERBS_ABI_VERSION        2
+#define C4IW_UVERBS_ABI_VERSION        3
 
 /*
  * Make sure that all structs defined in this file remain laid out so
index 86af71351d9a5be6cbdd87f3d22ccb7078c82b23..105246fba2e7c381958c2e5e74b3a4af7b1441a8 100644 (file)
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
                                ah_attr->grh.sgid_index, &sgid, &gid_attr);
        if (ret)
                return ERR_PTR(ret);
-       memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+       eth_zero_addr(ah->av.eth.s_mac);
        if (gid_attr.ndev) {
                if (is_vlan_dev(gid_attr.ndev))
                        vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
        ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
        ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+       ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
        if (ah_attr->static_rate) {
                ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
                while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
index b88fc8f5ab180c717e827f582187b817f92a0e33..9f8b516eb2b0f2412452685d21df360075245f0c 100644 (file)
@@ -811,9 +811,6 @@ repoll:
                        wc->opcode    = IB_WC_MASKED_FETCH_ADD;
                        wc->byte_len  = 8;
                        break;
-               case MLX4_OPCODE_BIND_MW:
-                       wc->opcode    = IB_WC_BIND_MW;
-                       break;
                case MLX4_OPCODE_LSO:
                        wc->opcode    = IB_WC_LSO;
                        break;
index 97d6878f993828085a0f26f941572674ce68b967..1c7ab6cabbb86989fba8952b8f5e6be645e313dc 100644 (file)
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
        return dev;
 }
 
-static int mlx4_ib_update_gids(struct gid_entry *gids,
-                              struct mlx4_ib_dev *ibdev,
-                              u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+                                 struct mlx4_ib_dev *ibdev,
+                                 u8 port_num)
 {
        struct mlx4_cmd_mailbox *mailbox;
        int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
        return err;
 }
 
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+                                    struct mlx4_ib_dev *ibdev,
+                                    u8 port_num)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       int err;
+       struct mlx4_dev *dev = ibdev->dev;
+       int i;
+       struct {
+               union ib_gid    gid;
+               __be32          rsrvd1[2];
+               __be16          rsrvd2;
+               u8              type;
+               u8              version;
+               __be32          rsrvd3;
+       } *gid_tbl;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return -ENOMEM;
+
+       gid_tbl = mailbox->buf;
+       for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+               memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+               if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+                       gid_tbl[i].version = 2;
+                       if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+                               gid_tbl[i].type = 1;
+                       else
+                               memset(&gid_tbl[i].gid, 0, 12);
+               }
+       }
+
+       err = mlx4_cmd(dev, mailbox->dma,
+                      MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+                      1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                      MLX4_CMD_WRAPPED);
+       if (mlx4_is_bonded(dev))
+               err += mlx4_cmd(dev, mailbox->dma,
+                               MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+                               1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                               MLX4_CMD_WRAPPED);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+                              struct mlx4_ib_dev *ibdev,
+                              u8 port_num)
+{
+       if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+               return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+       return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
 static int mlx4_ib_add_gid(struct ib_device *device,
                           u8 port_num,
                           unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
        port_gid_table = &iboe->gids[port_num - 1];
        spin_lock_bh(&iboe->lock);
        for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
-               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+                   (port_gid_table->gids[i].gid_type == attr->gid_type))  {
                        found = i;
                        break;
                }
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
                        } else {
                                *context = port_gid_table->gids[free].ctx;
                                memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+                               port_gid_table->gids[free].gid_type = attr->gid_type;
                                port_gid_table->gids[free].ctx->real_index = free;
                                port_gid_table->gids[free].ctx->refcount = 1;
                                hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
                if (!gids) {
                        ret = -ENOMEM;
                } else {
-                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
                                memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+                               gids[i].gid_type = port_gid_table->gids[i].gid_type;
+                       }
                }
        }
        spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        int i;
        int ret;
        unsigned long flags;
+       struct ib_gid_attr attr;
 
        if (port_num > MLX4_MAX_PORTS)
                return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
                return index;
 
-       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
        if (ret)
                return ret;
 
+       if (attr.ndev)
+               dev_put(attr.ndev);
+
        if (!memcmp(&gid, &zgid, sizeof(gid)))
                return -EINVAL;
 
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        port_gid_table = &iboe->gids[port_num - 1];
 
        for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
-               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+                   attr.gid_type == port_gid_table->gids[i].gid_type) {
                        ctx = port_gid_table->gids[i].ctx;
                        break;
                }
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
                               struct ib_port_immutable *immutable)
 {
        struct ib_port_attr attr;
+       struct mlx4_ib_dev *mdev = to_mdev(ibdev);
        int err;
 
        err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 
-       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
                immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       else
-               immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+       } else {
+               if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+                       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+               if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+                       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+                               RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+       }
 
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
            dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
                ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
-               ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
                ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
 
                ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (mlx4_ib_init_sriov(ibdev))
                goto err_mad;
 
-       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
                if (!iboe->nb.notifier_call) {
                        iboe->nb.notifier_call = mlx4_ib_netdev_event;
                        err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                                goto err_notif;
                        }
                }
+               if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+                       err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+                       if (err) {
+                               goto err_notif;
+                       }
+               }
        }
 
        for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
index 1caa11edac03347a246467436daa72dd18d5c505..52ce7b000044f6e0814b59b08e1ea27a696e77d8 100644 (file)
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
        unsigned                tail;
 };
 
+enum {
+       MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
 enum mlx4_ib_qp_flags {
        MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
        MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
        MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+       /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+       MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
        MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
        MLX4_IB_SRIOV_SQP = 1 << 31,
 };
@@ -478,6 +485,7 @@ struct gid_cache_context {
 
 struct gid_entry {
        union ib_gid    gid;
+       enum ib_gid_type gid_type;
        struct gid_cache_context *ctx;
 };
 
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
 struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                   struct ib_mw_bind *mw_bind);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
index 4d1e1c632603a7b81e6685cd5ad6020f98a02a47..242b94ec105babe092ba3fee42c931ee24518adc 100644 (file)
@@ -366,28 +366,6 @@ err_free:
        return ERR_PTR(err);
 }
 
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                   struct ib_mw_bind *mw_bind)
-{
-       struct ib_bind_mw_wr  wr;
-       struct ib_send_wr *bad_wr;
-       int ret;
-
-       memset(&wr, 0, sizeof(wr));
-       wr.wr.opcode            = IB_WR_BIND_MW;
-       wr.wr.wr_id             = mw_bind->wr_id;
-       wr.wr.send_flags        = mw_bind->send_flags;
-       wr.mw                   = mw;
-       wr.bind_info            = mw_bind->bind_info;
-       wr.rkey                 = ib_inc_rkey(mw->rkey);
-
-       ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
-       if (!ret)
-               mw->rkey = wr.rkey;
-
-       return ret;
-}
-
 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
 {
        struct mlx4_ib_mw *mw = to_mmw(ibmw);
index 13eaaf45288f80d4bb6658d5a18985a978e8c98d..bc5536f00b6cd4cc148f65b2c6131d7e627544eb 100644 (file)
@@ -32,6 +32,8 @@
  */
 
 #include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
        u32                     send_psn;
        struct ib_ud_header     ud_header;
        u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
+       struct ib_qp            *roce_v2_gsi;
 };
 
 enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
-       [IB_WR_BIND_MW]                         = cpu_to_be32(MLX4_OPCODE_BIND_MW),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
                        }
                }
        }
-       return proxy_sqp;
+       if (proxy_sqp)
+               return 1;
+
+       return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
 }
 
 /* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+               qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+                                       gfp | __GFP_NOWARN);
                if (!qp->sq.wrid)
                        qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
                                                gfp, PAGE_KERNEL);
-               qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+               qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+                                       gfp | __GFP_NOWARN);
                if (!qp->rq.wrid)
                        qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
                                                gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
                return dev->dev->caps.qp1_proxy[attr->port_num - 1];
 }
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-                               struct ib_qp_init_attr *init_attr,
-                               struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+                                       struct ib_qp_init_attr *init_attr,
+                                       struct ib_udata *udata)
 {
        struct mlx4_ib_qp *qp = NULL;
        int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                        MLX4_IB_SRIOV_TUNNEL_QP |
                                        MLX4_IB_SRIOV_SQP |
                                        MLX4_IB_QP_NETIF |
+                                       MLX4_IB_QP_CREATE_ROCE_V2_GSI |
                                        MLX4_IB_QP_CREATE_USE_GFP_NOIO))
                return ERR_PTR(-EINVAL);
 
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                        return ERR_PTR(-EINVAL);
        }
 
-       if (init_attr->create_flags &&
-           ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
-            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
-                                          MLX4_IB_QP_CREATE_USE_GFP_NOIO |
-                                          MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
-             init_attr->qp_type != IB_QPT_UD) ||
-            ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
-             init_attr->qp_type > IB_QPT_GSI)))
-               return ERR_PTR(-EINVAL);
+       if (init_attr->create_flags) {
+               if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+                       return ERR_PTR(-EINVAL);
+
+               if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+                                                MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+                                                MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
+                                                MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+                    init_attr->qp_type != IB_QPT_UD) ||
+                   (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+                    init_attr->qp_type > IB_QPT_GSI) ||
+                   (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+                    init_attr->qp_type != IB_QPT_GSI))
+                       return ERR_PTR(-EINVAL);
+       }
 
        switch (init_attr->qp_type) {
        case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        {
+               int sqpn;
+
                /* Userspace is not allowed to create special QPs: */
                if (udata)
                        return ERR_PTR(-EINVAL);
+               if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+                       int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+                       if (res)
+                               return ERR_PTR(res);
+               } else {
+                       sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+               }
 
                err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
-                                      get_sqp_num(to_mdev(pd->device), init_attr),
+                                      sqpn,
                                       &qp, gfp);
                if (err)
                        return ERR_PTR(err);
 
                qp->port        = init_attr->port_num;
-               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+               qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+                       init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
                break;
        }
        default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        return &qp->ibqp;
 }
 
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+                               struct ib_qp_init_attr *init_attr,
+                               struct ib_udata *udata) {
+       struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+       struct ib_qp *ibqp;
+       struct mlx4_ib_dev *dev = to_mdev(device);
+
+       ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+       if (!IS_ERR(ibqp) &&
+           (init_attr->qp_type == IB_QPT_GSI) &&
+           !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+               struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+               int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+               if (is_eth &&
+                   dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+                       init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+                       sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+                       if (IS_ERR(sqp->roce_v2_gsi)) {
+                               pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+                               sqp->roce_v2_gsi = NULL;
+                       } else {
+                               sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+                               sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+                       }
+
+                       init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+               }
+       }
+       return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
 {
        struct mlx4_ib_dev *dev = to_mdev(qp->device);
        struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
        return 0;
 }
 
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+       struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+       if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+               if (sqp->roce_v2_gsi)
+                       ib_destroy_qp(sqp->roce_v2_gsi);
+       }
+
+       return _mlx4_ib_destroy_qp(qp);
+}
+
 static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
 {
        switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
        return 0;
 }
 
+enum {
+       MLX4_QPC_ROCE_MODE_1 = 0,
+       MLX4_QPC_ROCE_MODE_2 = 2,
+       MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+       switch (gid_type) {
+       case IB_GID_TYPE_ROCE:
+               return MLX4_QPC_ROCE_MODE_1;
+       case IB_GID_TYPE_ROCE_UDP_ENCAP:
+               return MLX4_QPC_ROCE_MODE_2;
+       default:
+               return MLX4_QPC_ROCE_MODE_UNDEFINED;
+       }
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1633,6 +1723,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                        mlx4_ib_steer_qp_reg(dev, qp, 1);
                        steer_qp = 1;
                }
+
+               if (ibqp->qp_type == IB_QPT_GSI) {
+                       enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+                               IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+                       u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+                       context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+               }
        }
 
        if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1748,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                u16 vlan = 0xffff;
                u8 smac[ETH_ALEN];
                int status = 0;
+               int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+                       attr->ah_attr.ah_flags & IB_AH_GRH;
 
-               if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
-                   attr->ah_attr.ah_flags & IB_AH_GRH) {
+               if (is_eth) {
                        int index = attr->ah_attr.grh.sgid_index;
 
                        status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1773,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 
                optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
                           MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+               if (is_eth &&
+                   (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+                       u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+                       if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+               }
+
        }
 
        if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1956,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                sqd_event = 0;
 
        if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-               context->rlkey |= (1 << 4);
+               context->rlkey_roce_mode |= (1 << 4);
 
        /*
         * Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2133,8 @@ out:
        return err;
 }
 
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                     int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                             int attr_mask, struct ib_udata *udata)
 {
        struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2237,27 @@ out:
        return err;
 }
 
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+{
+       struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+       int ret;
+
+       ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+       if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+               int err = 0;
+
+               if (sqp->roce_v2_gsi)
+                       err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+               if (err)
+                       pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+                              err);
+       }
+       return ret;
+}
+
 static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 {
        int i;
@@ -2168,7 +2300,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
                send_size += sizeof (struct mlx4_ib_tunnel_header);
 
-       ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+       ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
 
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
                sqp->ud_header.lrh.service_level =
@@ -2252,16 +2384,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        return 0;
 }
 
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
-       int i;
-
-       for (i = ETH_ALEN; i; i--) {
-               dst_mac[i - 1] = src_mac & 0xff;
-               src_mac >>= 8;
-       }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
@@ -2281,6 +2404,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        bool is_eth;
        bool is_vlan = false;
        bool is_grh;
+       bool is_udp = false;
+       int ip_version = 0;
 
        send_size = 0;
        for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2414,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
        if (is_eth) {
+               struct ib_gid_attr gid_attr;
+
                if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
                        /* When multi-function is enabled, the ib_core gid
                         * indexes don't necessarily match the hw ones, so
@@ -2302,19 +2429,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                        err = ib_get_cached_gid(ib_dev,
                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
                                                ah->av.ib.gid_index, &sgid,
-                                               NULL);
-                       if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
-                               err = -ENOENT;
-                       if (err)
+                                               &gid_attr);
+                       if (!err) {
+                               if (gid_attr.ndev)
+                                       dev_put(gid_attr.ndev);
+                               if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+                                       err = -ENOENT;
+                       }
+                       if (!err) {
+                               is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+                               if (is_udp) {
+                                       if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+                                               ip_version = 4;
+                                       else
+                                               ip_version = 6;
+                                       is_grh = false;
+                               }
+                       } else {
                                return err;
+                       }
                }
-
                if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
                        vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
                        is_vlan = 1;
                }
        }
-       ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+       err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+                         ip_version, is_udp, 0, &sqp->ud_header);
+       if (err)
+               return err;
 
        if (!is_eth) {
                sqp->ud_header.lrh.service_level =
@@ -2323,7 +2466,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
        }
 
-       if (is_grh) {
+       if (is_grh || (ip_version == 6)) {
                sqp->ud_header.grh.traffic_class =
                        (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
                sqp->ud_header.grh.flow_label    =
@@ -2352,6 +2495,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                       ah->av.ib.dgid, 16);
        }
 
+       if (ip_version == 4) {
+               sqp->ud_header.ip4.tos =
+                       (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+               sqp->ud_header.ip4.id = 0;
+               sqp->ud_header.ip4.frag_off = htons(IP_DF);
+               sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+               memcpy(&sqp->ud_header.ip4.saddr,
+                      sgid.raw + 12, 4);
+               memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+               sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+       }
+
+       if (is_udp) {
+               sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+               sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+               sqp->ud_header.udp.csum = 0;
+       }
+
        mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
        if (!is_eth) {
@@ -2380,34 +2542,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 
        if (is_eth) {
                struct in6_addr in6;
-
+               u16 ether_type;
                u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
 
+               ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+                       (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
                mlx->sched_prio = cpu_to_be16(pcp);
 
+               ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
                memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
-               /* FIXME: cache smac value? */
                memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
                memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
                memcpy(&in6, sgid.raw, sizeof(in6));
 
-               if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
-                       u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
-                       u8 smac[ETH_ALEN];
-
-                       mlx4_u64_to_smac(smac, mac);
-                       memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
-               } else {
-                       /* use the src mac of the tunnel */
-                       memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
-               }
 
                if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
                        mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
                if (!is_vlan) {
-                       sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+                       sqp->ud_header.eth.type = cpu_to_be16(ether_type);
                } else {
-                       sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+                       sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
                        sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
                }
        } else {
@@ -2528,25 +2683,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
        fseg->reserved[1]       = 0;
 }
 
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
-               struct ib_bind_mw_wr *wr)
-{
-       bseg->flags1 =
-               convert_access(wr->bind_info.mw_access_flags) &
-               cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
-                           MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
-                           MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
-       bseg->flags2 = 0;
-       if (wr->mw->type == IB_MW_TYPE_2)
-               bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
-       if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
-               bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
-       bseg->new_rkey = cpu_to_be32(wr->rkey);
-       bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
-       bseg->addr = cpu_to_be64(wr->bind_info.addr);
-       bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
 {
        memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2902,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        int i;
        struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 
+       if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+               struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+               if (sqp->roce_v2_gsi) {
+                       struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+                       struct ib_gid_attr gid_attr;
+                       union ib_gid gid;
+
+                       if (!ib_get_cached_gid(ibqp->device,
+                                              be32_to_cpu(ah->av.ib.port_pd) >> 24,
+                                              ah->av.ib.gid_index, &gid,
+                                              &gid_attr)) {
+                               if (gid_attr.ndev)
+                                       dev_put(gid_attr.ndev);
+                               qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+                                       to_mqp(sqp->roce_v2_gsi) : qp;
+                       } else {
+                               pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+                                      ah->av.ib.gid_index);
+                       }
+               }
+       }
+
        spin_lock_irqsave(&qp->sq.lock, flags);
        if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
                err = -EIO;
@@ -2867,13 +3026,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
                                break;
 
-                       case IB_WR_BIND_MW:
-                               ctrl->srcrb_flags |=
-                                       cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_bind_seg(wqe, bind_mw_wr(wr));
-                               wqe  += sizeof(struct mlx4_wqe_bind_seg);
-                               size += sizeof(struct mlx4_wqe_bind_seg) / 16;
-                               break;
                        default:
                                /* No extra segments required for sends */
                                break;
index c394376ebe06f159aebca07ef899ef5933386713..0597f3eef5d03ce4729c0fc0f5f0ffd076ae1176 100644 (file)
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+               srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+                                       GFP_KERNEL | __GFP_NOWARN);
                if (!srq->wrid) {
                        srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
                                              GFP_KERNEL, PAGE_KERNEL);
index 66080580e24db3e90c887bcea7132e6687bc3d33..745efa4cfc718eec9b749508f54b091ed13332d3 100644 (file)
 
 #include "mlx5_ib.h"
 
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
-                          struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+                                 struct mlx5_ib_ah *ah,
+                                 struct ib_ah_attr *ah_attr,
+                                 enum rdma_link_layer ll)
 {
        if (ah_attr->ah_flags & IB_AH_GRH) {
                memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
                ah->av.tclass = ah_attr->grh.traffic_class;
        }
 
-       ah->av.rlid = cpu_to_be16(ah_attr->dlid);
-       ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
-       ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+       ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+               ah->av.udp_sport =
+                       mlx5_get_roce_udp_sport(dev,
+                                               ah_attr->port_num,
+                                               ah_attr->grh.sgid_index);
+               ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+       } else {
+               ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+               ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+               ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+       }
 
        return &ah->ibah;
 }
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 {
        struct mlx5_ib_ah *ah;
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       enum rdma_link_layer ll;
+
+       ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+       if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+               return ERR_PTR(-EINVAL);
 
        ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
        if (!ah)
                return ERR_PTR(-ENOMEM);
 
-       return create_ib_ah(ah_attr, ah); /* never fails */
+       return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
 }
 
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
index 92ddae101ecc7dc6032705e56a2c75d46fd0bf71..fd1de31e0611cbbd3d8691e595677d8e182f3198 100644 (file)
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                wc->opcode    = IB_WC_MASKED_FETCH_ADD;
                wc->byte_len  = 8;
                break;
-       case MLX5_OPCODE_BIND_MW:
-               wc->opcode    = IB_WC_BIND_MW;
-               break;
        case MLX5_OPCODE_UMR:
                wc->opcode = get_umr_comp(wq, idx);
                break;
@@ -171,6 +168,7 @@ enum {
 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                             struct mlx5_ib_qp *qp)
 {
+       enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
        struct mlx5_ib_srq *srq;
        struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
        } else {
                wc->pkey_index = 0;
        }
+
+       if (ll != IB_LINK_LAYER_ETHERNET)
+               return;
+
+       switch (wc->sl & 0x3) {
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+               wc->network_hdr_type = RDMA_NETWORK_IB;
+               break;
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+               wc->network_hdr_type = RDMA_NETWORK_IPV6;
+               break;
+       case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+               wc->network_hdr_type = RDMA_NETWORK_IPV4;
+               break;
+       }
+       wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
 }
 
 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -760,12 +774,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
        int eqn;
        int err;
 
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
        if (entries < 0)
                return ERR_PTR(-EINVAL);
 
+       if (check_cq_create_flags(attr->flags))
+               return ERR_PTR(-EOPNOTSUPP);
+
        entries = roundup_pow_of_two(entries + 1);
        if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
                return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
        spin_lock_init(&cq->lock);
        cq->resize_buf = NULL;
        cq->resize_umem = NULL;
+       cq->create_flags = attr->flags;
 
        if (context) {
                err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 
        cq->cqe_size = cqe_size;
        cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+       if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+               cqb->ctx.cqe_sz_flags |= (1 << 1);
+
        cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
        err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
        if (err)
index b0ec175cc6ba3b4c63ec8c5e22082a3603b5b473..03c418ccbc982e4114681044798c075a65357b28 100644 (file)
@@ -40,6 +40,8 @@
 #include <linux/io-mapping.h>
 #include <linux/sched.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
 #include <linux/mlx5/vport.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
        DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
        DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
 
+enum {
+       MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
 static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
 {
-       struct mlx5_ib_dev *dev = to_mdev(device);
-
-       switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+       switch (port_type_cap) {
        case MLX5_CAP_PORT_TYPE_IB:
                return IB_LINK_LAYER_INFINIBAND;
        case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
        }
 }
 
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+       return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+                            unsigned long event, void *ptr)
+{
+       struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+       struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+                                                roce.nb);
+
+       if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+               return NOTIFY_DONE;
+
+       write_lock(&ibdev->roce.netdev_lock);
+       if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+               ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+       write_unlock(&ibdev->roce.netdev_lock);
+
+       return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+                                            u8 port_num)
+{
+       struct mlx5_ib_dev *ibdev = to_mdev(device);
+       struct net_device *ndev;
+
+       /* Ensure ndev does not disappear before we invoke dev_hold()
+        */
+       read_lock(&ibdev->roce.netdev_lock);
+       ndev = ibdev->roce.netdev;
+       if (ndev)
+               dev_hold(ndev);
+       read_unlock(&ibdev->roce.netdev_lock);
+
+       return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+                               struct ib_port_attr *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       struct net_device *ndev;
+       enum ib_mtu ndev_ib_mtu;
+       u16 qkey_viol_cntr;
+
+       memset(props, 0, sizeof(*props));
+
+       props->port_cap_flags  |= IB_PORT_CM_SUP;
+       props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
+
+       props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
+                                               roce_address_table_size);
+       props->max_mtu          = IB_MTU_4096;
+       props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+       props->pkey_tbl_len     = 1;
+       props->state            = IB_PORT_DOWN;
+       props->phys_state       = 3;
+
+       mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+       props->qkey_viol_cntr = qkey_viol_cntr;
+
+       ndev = mlx5_ib_get_netdev(device, port_num);
+       if (!ndev)
+               return 0;
+
+       if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+               props->state      = IB_PORT_ACTIVE;
+               props->phys_state = 5;
+       }
+
+       ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+       dev_put(ndev);
+
+       props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
+
+       props->active_width     = IB_WIDTH_4X;  /* TODO */
+       props->active_speed     = IB_SPEED_QDR; /* TODO */
+
+       return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+                                    const struct ib_gid_attr *attr,
+                                    void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+       char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+                                              source_l3_address);
+       void *mlx5_addr_mac     = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+                                              source_mac_47_32);
+
+       if (!gid)
+               return;
+
+       ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+       if (is_vlan_dev(attr->ndev)) {
+               MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+               MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+       }
+
+       switch (attr->gid_type) {
+       case IB_GID_TYPE_IB:
+               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+               break;
+       case IB_GID_TYPE_ROCE_UDP_ENCAP:
+               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+               break;
+
+       default:
+               WARN_ON(true);
+       }
+
+       if (attr->gid_type != IB_GID_TYPE_IB) {
+               if (ipv6_addr_v4mapped((void *)gid))
+                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
+                                   MLX5_ROCE_L3_TYPE_IPV4);
+               else
+                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
+                                   MLX5_ROCE_L3_TYPE_IPV6);
+       }
+
+       if ((attr->gid_type == IB_GID_TYPE_IB) ||
+           !ipv6_addr_v4mapped((void *)gid))
+               memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+       else
+               memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+                        unsigned int index,
+                        const union ib_gid *gid,
+                        const struct ib_gid_attr *attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
+       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+       void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+       if (ll != IB_LINK_LAYER_ETHERNET)
+               return -EINVAL;
+
+       memset(in, 0, sizeof(in));
+
+       ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+       MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+       MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+       memset(out, 0, sizeof(out));
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+                          unsigned int index, const union ib_gid *gid,
+                          const struct ib_gid_attr *attr,
+                          __always_unused void **context)
+{
+       return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+                          unsigned int index, __always_unused void **context)
+{
+       return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+                              int index)
+{
+       struct ib_gid_attr attr;
+       union ib_gid gid;
+
+       if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+               return 0;
+
+       if (!attr.ndev)
+               return 0;
+
+       dev_put(attr.ndev);
+
+       if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+               return 0;
+
+       return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
        return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
        if (mlx5_use_mad_ifc(to_mdev(ibdev)))
                return MLX5_VPORT_ACCESS_METHOD_MAD;
 
-       if (mlx5_ib_port_link_layer(ibdev) ==
+       if (mlx5_ib_port_link_layer(ibdev, 1) ==
            IB_LINK_LAYER_ETHERNET)
                return MLX5_VPORT_ACCESS_METHOD_NIC;
 
        return MLX5_VPORT_ACCESS_METHOD_HCA;
 }
 
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+                           struct ib_device_attr *props)
+{
+       u8 tmp;
+       u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+       u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+       u8 atomic_req_8B_endianness_mode =
+               MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+       /* Check if HW supports 8 bytes standard atomic operations and capable
+        * of host endianness respond
+        */
+       tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+       if (((atomic_operations & tmp) == tmp) &&
+           (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+           (atomic_req_8B_endianness_mode)) {
+               props->atomic_cap = IB_ATOMIC_HCA;
+       } else {
+               props->atomic_cap = IB_ATOMIC_NONE;
+       }
+}
+
 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
                                        __be64 *sys_image_guid)
 {
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
 
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
-               if (!err)
-                       *sys_image_guid = cpu_to_be64(tmp);
-               return err;
+               break;
+
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+               break;
 
        default:
                return -EINVAL;
        }
+
+       if (!err)
+               *sys_image_guid = cpu_to_be64(tmp);
+
+       return err;
+
 }
 
 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
 
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
-               if (!err)
-                       *node_guid = cpu_to_be64(tmp);
-               return err;
+               break;
+
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+               break;
 
        default:
                return -EINVAL;
        }
+
+       if (!err)
+               *node_guid = cpu_to_be64(tmp);
+
+       return err;
 }
 
 struct mlx5_reg_node_desc {
@@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        if (MLX5_CAP_GEN(mdev, block_lb_mc))
                props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 
+       if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+           (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+                       props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
        props->vendor_part_id      = mdev->pdev->device;
        props->hw_ver              = mdev->pdev->revision;
 
@@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_sge = min(max_rq_sg, max_sq_sg);
        props->max_sge_rd = props->max_sge;
        props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
-       props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+       props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
        props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
        props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
        props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
        props->max_srq_sge         = max_rq_sg - 1;
        props->max_fast_reg_page_list_len = (unsigned int)-1;
-       props->atomic_cap          = IB_ATOMIC_NONE;
+       get_atomic_caps(dev, props);
        props->masked_atomic_cap   = IB_ATOMIC_NONE;
        props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
        props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
                                           props->max_mcast_grp;
        props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+       props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+       props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->odp_caps = dev->odp_caps;
 #endif
 
+       if (MLX5_CAP_GEN(mdev, cd))
+               props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
        return 0;
 }
 
@@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
        case MLX5_VPORT_ACCESS_METHOD_HCA:
                return mlx5_query_hca_port(ibdev, port, props);
 
+       case MLX5_VPORT_ACCESS_METHOD_NIC:
+               return mlx5_query_port_roce(ibdev, port, props);
+
        default:
                return -EINVAL;
        }
@@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
                                                  struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
-       struct mlx5_ib_alloc_ucontext_req_v2 req;
-       struct mlx5_ib_alloc_ucontext_resp resp;
+       struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+       struct mlx5_ib_alloc_ucontext_resp resp = {};
        struct mlx5_ib_ucontext *context;
        struct mlx5_uuar_info *uuari;
        struct mlx5_uar *uars;
@@ -595,24 +844,28 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        int err;
        int i;
        size_t reqlen;
+       size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+                                    max_cqe_version);
 
        if (!dev->ib_active)
                return ERR_PTR(-EAGAIN);
 
-       memset(&req, 0, sizeof(req));
+       if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+               return ERR_PTR(-EINVAL);
+
        reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
        if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
                ver = 0;
-       else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+       else if (reqlen >= min_req_v2)
                ver = 2;
        else
                return ERR_PTR(-EINVAL);
 
-       err = ib_copy_from_udata(&req, udata, reqlen);
+       err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
        if (err)
                return ERR_PTR(err);
 
-       if (req.flags || req.reserved)
+       if (req.flags)
                return ERR_PTR(-EINVAL);
 
        if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +874,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        if (req.total_num_uuars == 0)
                return ERR_PTR(-EINVAL);
 
+       if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+               return ERR_PTR(-EOPNOTSUPP);
+
+       if (reqlen > sizeof(req) &&
+           !ib_is_udata_cleared(udata, sizeof(req),
+                                reqlen - sizeof(req)))
+               return ERR_PTR(-EOPNOTSUPP);
+
        req.total_num_uuars = ALIGN(req.total_num_uuars,
                                    MLX5_NON_FP_BF_REGS_PER_PAGE);
        if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +897,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
        resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
        resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+       resp.cqe_version = min_t(__u8,
+                                (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+                                req.max_cqe_version);
+       resp.response_length = min(offsetof(typeof(resp), response_length) +
+                                  sizeof(resp.response_length), udata->outlen);
 
        context = kzalloc(sizeof(*context), GFP_KERNEL);
        if (!context)
@@ -681,22 +947,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 #endif
 
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+               err = mlx5_core_alloc_transport_domain(dev->mdev,
+                                                      &context->tdn);
+               if (err)
+                       goto out_uars;
+       }
+
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
 
        resp.tot_uuars = req.total_num_uuars;
        resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
-       err = ib_copy_to_udata(udata, &resp,
-                              sizeof(resp) - sizeof(resp.reserved));
+
+       if (field_avail(typeof(resp), cqe_version, udata->outlen))
+               resp.response_length += sizeof(resp.cqe_version);
+
+       if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+               resp.comp_mask |=
+                       MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+               resp.hca_core_clock_offset =
+                       offsetof(struct mlx5_init_seg, internal_timer_h) %
+                       PAGE_SIZE;
+               resp.response_length += sizeof(resp.hca_core_clock_offset) +
+                                       sizeof(resp.reserved2) +
+                                       sizeof(resp.reserved3);
+       }
+
+       err = ib_copy_to_udata(udata, &resp, resp.response_length);
        if (err)
-               goto out_uars;
+               goto out_td;
 
        uuari->ver = ver;
        uuari->num_low_latency_uuars = req.num_low_latency_uuars;
        uuari->uars = uars;
        uuari->num_uars = num_uars;
+       context->cqe_version = resp.cqe_version;
+
        return &context->ibucontext;
 
+out_td:
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+               mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
 out_uars:
        for (i--; i >= 0; i--)
                mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1014,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
        struct mlx5_uuar_info *uuari = &context->uuari;
        int i;
 
+       if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+               mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
        for (i = 0; i < uuari->num_uars; i++) {
                if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
                        mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1086,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
        case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
                return -ENOSYS;
 
+       case MLX5_IB_MMAP_CORE_CLOCK:
+               if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+                       return -EINVAL;
+
+               if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+                       return -EPERM;
+
+               /* Don't expose to user-space information it shouldn't have */
+               if (PAGE_SIZE > 4096)
+                       return -EOPNOTSUPP;
+
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+               pfn = (dev->mdev->iseg_base +
+                      offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+                       PAGE_SHIFT;
+               if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+                                      PAGE_SIZE, vma->vm_page_prot))
+                       return -EAGAIN;
+
+               mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+                           vma->vm_start,
+                           (unsigned long long)pfn << PAGE_SHIFT);
+               break;
+
        default:
                return -EINVAL;
        }
@@ -1758,6 +2078,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
        mlx5_ib_dealloc_pd(devr->p0);
 }
 
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+       u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+       u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+       u32 ret = 0;
+
+       if (ll == IB_LINK_LAYER_INFINIBAND)
+               return RDMA_CORE_PORT_IBA_IB;
+
+       if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+               return 0;
+
+       if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+               return 0;
+
+       if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+               ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+       if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+               ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+       return ret;
+}
+
 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
                               struct ib_port_immutable *immutable)
 {
@@ -1770,20 +2116,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+       immutable->core_cap_flags = get_core_cap_flags(ibdev);
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
 }
 
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+       int err;
+
+       dev->roce.nb.notifier_call = mlx5_netdev_event;
+       err = register_netdevice_notifier(&dev->roce.nb);
+       if (err)
+               return err;
+
+       err = mlx5_nic_vport_enable_roce(dev->mdev);
+       if (err)
+               goto err_unregister_netdevice_notifier;
+
+       return 0;
+
+err_unregister_netdevice_notifier:
+       unregister_netdevice_notifier(&dev->roce.nb);
+       return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+       mlx5_nic_vport_disable_roce(dev->mdev);
+       unregister_netdevice_notifier(&dev->roce.nb);
+}
+
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
        struct mlx5_ib_dev *dev;
+       enum rdma_link_layer ll;
+       int port_type_cap;
        int err;
        int i;
 
-       /* don't create IB instance over Eth ports, no RoCE yet! */
-       if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+       port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+       ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+       if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
                return NULL;
 
        printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2170,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
        dev->mdev = mdev;
 
+       rwlock_init(&dev->roce.netdev_lock);
        err = get_port_caps(dev);
        if (err)
                goto err_dealloc;
@@ -1839,11 +2216,18 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
                (1ull << IB_USER_VERBS_CMD_OPEN_QP);
        dev->ib_dev.uverbs_ex_cmd_mask =
-               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE)     |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)        |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 
        dev->ib_dev.query_device        = mlx5_ib_query_device;
        dev->ib_dev.query_port          = mlx5_ib_query_port;
+       dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
+       dev->ib_dev.add_gid             = mlx5_ib_add_gid;
+       dev->ib_dev.del_gid             = mlx5_ib_del_gid;
        dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
        dev->ib_dev.modify_device       = mlx5_ib_modify_device;
        dev->ib_dev.modify_port         = mlx5_ib_modify_port;
@@ -1893,7 +2277,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                        (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
        }
 
-       if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+       if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
            IB_LINK_LAYER_ETHERNET) {
                dev->ib_dev.create_flow = mlx5_ib_create_flow;
                dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2292,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
 
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               err = mlx5_enable_roce(dev);
+               if (err)
+                       goto err_dealloc;
+       }
+
        err = create_dev_resources(&dev->devr);
        if (err)
-               goto err_dealloc;
+               goto err_disable_roce;
 
        err = mlx5_ib_odp_init_one(dev);
        if (err)
@@ -1947,6 +2337,10 @@ err_odp:
 err_rsrc:
        destroy_dev_resources(&dev->devr);
 
+err_disable_roce:
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               mlx5_disable_roce(dev);
+
 err_dealloc:
        ib_dealloc_device((struct ib_device *)dev);
 
@@ -1956,11 +2350,14 @@ err_dealloc:
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
        struct mlx5_ib_dev *dev = context;
+       enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
 
        ib_unregister_device(&dev->ib_dev);
        destroy_umrc_res(dev);
        mlx5_ib_odp_remove_one(dev);
        destroy_dev_resources(&dev->devr);
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               mlx5_disable_roce(dev);
        ib_dealloc_device(&dev->ib_dev);
 }
 
index 1474cccd1e0f3c4b5e9bc5b6575dc32fc8535349..d2b9737baa3675b1dc9ced794767f1790ada35ee 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
 #include <linux/types.h>
+#include <linux/mlx5/transobj.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)                               \
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
 pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,    \
        __LINE__, current->pid, ##arg)
 
+#define field_avail(type, fld, sz) (offsetof(type, fld) +              \
+                                   sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
 enum {
        MLX5_IB_MMAP_CMD_SHIFT  = 8,
        MLX5_IB_MMAP_CMD_MASK   = 0xff,
@@ -62,7 +68,9 @@ enum {
 
 enum mlx5_ib_mmap_cmd {
        MLX5_IB_MMAP_REGULAR_PAGE               = 0,
-       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1, /* always last */
+       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1,
+       /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+       MLX5_IB_MMAP_CORE_CLOCK                 = 5,
 };
 
 enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
        MLX5_MAD_IFC_NET_VIEW           = 4,
 };
 
+enum {
+       MLX5_CROSS_CHANNEL_UUAR         = 0,
+};
+
+enum {
+       MLX5_CQE_VERSION_V0,
+       MLX5_CQE_VERSION_V1,
+};
+
 struct mlx5_ib_ucontext {
        struct ib_ucontext      ibucontext;
        struct list_head        db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
         */
        struct mutex            db_page_mutex;
        struct mlx5_uuar_info   uuari;
+       u8                      cqe_version;
+       /* Transport Domain number */
+       u32                     tdn;
 };
 
 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -201,47 +221,70 @@ struct mlx5_ib_pfault {
        struct mlx5_pagefault   mpfault;
 };
 
+struct mlx5_ib_ubuffer {
+       struct ib_umem         *umem;
+       int                     buf_size;
+       u64                     buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+       struct mlx5_ib_qp       *container_mibqp;
+       struct mlx5_core_qp     mqp;
+       struct mlx5_ib_ubuffer  ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+       struct mlx5_ib_qp_base  base;
+       u16                     xrcdn;
+       u8                      alt_port;
+       u8                      atomic_rd_en;
+       u8                      resp_depth;
+};
+
 struct mlx5_ib_rq {
+       struct mlx5_ib_qp_base base;
+       struct mlx5_ib_wq       *rq;
+       struct mlx5_ib_ubuffer  ubuffer;
+       struct mlx5_db          *doorbell;
        u32                     tirn;
+       u8                      state;
+};
+
+struct mlx5_ib_sq {
+       struct mlx5_ib_qp_base base;
+       struct mlx5_ib_wq       *sq;
+       struct mlx5_ib_ubuffer  ubuffer;
+       struct mlx5_db          *doorbell;
+       u32                     tisn;
+       u8                      state;
 };
 
 struct mlx5_ib_raw_packet_qp {
+       struct mlx5_ib_sq sq;
        struct mlx5_ib_rq rq;
 };
 
 struct mlx5_ib_qp {
        struct ib_qp            ibqp;
        union {
-               struct mlx5_core_qp             mqp;
-               struct mlx5_ib_raw_packet_qp    raw_packet_qp;
+               struct mlx5_ib_qp_trans trans_qp;
+               struct mlx5_ib_raw_packet_qp raw_packet_qp;
        };
-
        struct mlx5_buf         buf;
 
        struct mlx5_db          db;
        struct mlx5_ib_wq       rq;
 
-       u32                     doorbell_qpn;
        u8                      sq_signal_bits;
        u8                      fm_cache;
-       int                     sq_max_wqes_per_wr;
-       int                     sq_spare_wqes;
        struct mlx5_ib_wq       sq;
 
-       struct ib_umem         *umem;
-       int                     buf_size;
-
        /* serialize qp state modifications
         */
        struct mutex            mutex;
-       u16                     xrcdn;
        u32                     flags;
        u8                      port;
-       u8                      alt_port;
-       u8                      atomic_rd_en;
-       u8                      resp_depth;
        u8                      state;
-       int                     mlx_type;
        int                     wq_sig;
        int                     scat_cqe;
        int                     max_inline_data;
@@ -284,6 +327,9 @@ struct mlx5_ib_cq_buf {
 enum mlx5_ib_qp_flags {
        MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
        MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
+       MLX5_IB_QP_CROSS_CHANNEL                = 1 << 2,
+       MLX5_IB_QP_MANAGED_SEND                 = 1 << 3,
+       MLX5_IB_QP_MANAGED_RECV                 = 1 << 4,
 };
 
 struct mlx5_umr_wr {
@@ -326,6 +372,7 @@ struct mlx5_ib_cq {
        struct mlx5_ib_cq_buf  *resize_buf;
        struct ib_umem         *resize_umem;
        int                     cqe_size;
+       u32                     create_flags;
 };
 
 struct mlx5_ib_srq {
@@ -449,9 +496,19 @@ struct mlx5_ib_resources {
        struct ib_srq   *s1;
 };
 
+struct mlx5_roce {
+       /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+        * netdev pointer
+        */
+       rwlock_t                netdev_lock;
+       struct net_device       *netdev;
+       struct notifier_block   nb;
+};
+
 struct mlx5_ib_dev {
        struct ib_device                ib_dev;
        struct mlx5_core_dev            *mdev;
+       struct mlx5_roce                roce;
        MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
        int                             num_ports;
        /* serialize update of capability mask
@@ -498,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
 
 static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
 {
-       return container_of(mqp, struct mlx5_ib_qp, mqp);
+       return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
 }
 
 static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -550,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
                 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
                 const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
-                          struct mlx5_ib_ah *ah);
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
 int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                      struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
-                         void *buffer, u32 length);
+                         void *buffer, u32 length,
+                         struct mlx5_ib_qp_base *base);
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
                                const struct ib_cq_init_attr *attr,
                                struct ib_ucontext *context,
@@ -680,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+                              int index);
+
 static inline void init_query_mad(struct ib_smp *mad)
 {
        mad->base_version  = 1;
@@ -705,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
 #define MLX5_MAX_UMR_SHIFT 16
 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
 
+static inline u32 check_cq_create_flags(u32 flags)
+{
+       /*
+        * It returns non-zero value for unsupported CQ
+        * create flags, otherwise it returns zero.
+        */
+       return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+                         IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+                                    u32 *user_index)
+{
+       if (cqe_version) {
+               if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+                   (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+                       return -EINVAL;
+               *user_index = cmd_uidx;
+       } else {
+               *user_index = MLX5_IB_DEFAULT_UIDX;
+       }
+
+       return 0;
+}
 #endif /* MLX5_IB_H */
index aa8391e75385016bb11a2526f8660331193b7c4e..b8d76361a48dcfe29914ff13011b09d2893c851d 100644 (file)
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
 
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
                                      struct mlx5_ib_pfault *pfault,
-                                     int error) {
+                                     int error)
+{
        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
-       int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
+       int ret = mlx5_core_page_fault_resume(dev->mdev,
+                                             qpn,
                                              pfault->mpfault.flags,
                                              error);
        if (ret)
-               pr_err("Failed to resolve the page fault on QP 0x%x\n",
-                      qp->mqp.qpn);
+               pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
 }
 
 /*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 #if defined(DEBUG)
        u32 ctrl_wqe_index, ctrl_qpn;
 #endif
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
        if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
        if (ds == 0) {
                mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
-                           wqe_index, qp->mqp.qpn);
+                           wqe_index, qpn);
                return -EFAULT;
        }
 
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
                        MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
        if (wqe_index != ctrl_wqe_index) {
                mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
-                           wqe_index, qp->mqp.qpn,
+                           wqe_index, qpn,
                            ctrl_wqe_index);
                return -EFAULT;
        }
 
        ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
                MLX5_WQE_CTRL_QPN_SHIFT;
-       if (qp->mqp.qpn != ctrl_qpn) {
+       if (qpn != ctrl_qpn) {
                mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
-                           wqe_index, qp->mqp.qpn,
+                           wqe_index, qpn,
                            ctrl_qpn);
                return -EFAULT;
        }
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
        int resume_with_error = 0;
        u16 wqe_index = pfault->mpfault.wqe.wqe_index;
        int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+       u32 qpn = qp->trans_qp.base.mqp.qpn;
 
        buffer = (char *)__get_free_page(GFP_KERNEL);
        if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
        }
 
        ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
-                                   PAGE_SIZE);
+                                   PAGE_SIZE, &qp->trans_qp.base);
        if (ret < 0) {
                mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
-                           -ret, wqe_index, qp->mqp.qpn);
+                           -ret, wqe_index, qpn);
                resume_with_error = 1;
                goto resolve_page_fault;
        }
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
 resolve_page_fault:
        mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
        mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
-                   qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+                   qpn, resume_with_error,
+                   pfault->mpfault.flags);
 
        free_page((unsigned long)buffer);
 }
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
        qp->disable_page_faults = 1;
        spin_lock_init(&qp->disable_page_faults_lock);
 
-       qp->mqp.pfault_handler  = mlx5_ib_pfault_handler;
+       qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
 
        for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
                INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
index 307bdbca8938e1d87493d7048b93126bd9261e26..9116bc3988a6f9338709f6120a4597794d2cf36d 100644 (file)
@@ -32,6 +32,8 @@
 
 #include <linux/module.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
 #include "mlx5_ib.h"
 #include "user.h"
 
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
  * Return: the number of bytes copied, or an error code.
  */
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
-                         void *buffer, u32 length)
+                         void *buffer, u32 length,
+                         struct mlx5_ib_qp_base *base)
 {
        struct ib_device *ibdev = qp->ibqp.device;
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
        size_t offset;
        size_t wq_end;
-       struct ib_umem *umem = qp->umem;
+       struct ib_umem *umem = base->ubuffer.umem;
        u32 first_copy_length;
        int wqe_length;
        int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
        struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
        struct ib_event event;
 
-       if (type == MLX5_EVENT_TYPE_PATH_MIG)
-               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+       if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+               /* This event is only valid for trans_qps */
+               to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+       }
 
        if (ibqp->event_handler) {
                event.device     = ibqp->device;
@@ -366,7 +371,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
 static int set_user_buf_size(struct mlx5_ib_dev *dev,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_ib_create_qp *ucmd)
+                           struct mlx5_ib_create_qp *ucmd,
+                           struct mlx5_ib_qp_base *base,
+                           struct ib_qp_init_attr *attr)
 {
        int desc_sz = 1 << qp->sq.wqe_shift;
 
@@ -391,8 +398,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
                return -EINVAL;
        }
 
-       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
-               (qp->sq.wqe_cnt << 6);
+       if (attr->qp_type == IB_QPT_RAW_PACKET) {
+               base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+               qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+       } else {
+               base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+                                        (qp->sq.wqe_cnt << 6);
+       }
 
        return 0;
 }
@@ -578,8 +590,8 @@ static int to_mlx5_st(enum ib_qp_type type)
        case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
        case IB_QPT_GSI:                return MLX5_QP_ST_QP1;
        case IB_QPT_RAW_IPV6:           return MLX5_QP_ST_RAW_IPV6;
-       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
        case IB_QPT_RAW_PACKET:
+       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
        case IB_QPT_MAX:
        default:                return -EINVAL;
        }
@@ -590,13 +602,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
        return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
 }
 
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+                           struct ib_pd *pd,
+                           unsigned long addr, size_t size,
+                           struct ib_umem **umem,
+                           int *npages, int *page_shift, int *ncont,
+                           u32 *offset)
+{
+       int err;
+
+       *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+       if (IS_ERR(*umem)) {
+               mlx5_ib_dbg(dev, "umem_get failed\n");
+               return PTR_ERR(*umem);
+       }
+
+       mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+       err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+       if (err) {
+               mlx5_ib_warn(dev, "bad offset\n");
+               goto err_umem;
+       }
+
+       mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+                   addr, size, *npages, *page_shift, *ncont, *offset);
+
+       return 0;
+
+err_umem:
+       ib_umem_release(*umem);
+       *umem = NULL;
+
+       return err;
+}
+
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                          struct mlx5_ib_qp *qp, struct ib_udata *udata,
+                         struct ib_qp_init_attr *attr,
                          struct mlx5_create_qp_mbox_in **in,
-                         struct mlx5_ib_create_qp_resp *resp, int *inlen)
+                         struct mlx5_ib_create_qp_resp *resp, int *inlen,
+                         struct mlx5_ib_qp_base *base)
 {
        struct mlx5_ib_ucontext *context;
        struct mlx5_ib_create_qp ucmd;
+       struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
        int page_shift = 0;
        int uar_index;
        int npages;
@@ -615,18 +665,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        /*
         * TBD: should come from the verbs when we have the API
         */
-       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
-       if (uuarn < 0) {
-               mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
-               mlx5_ib_dbg(dev, "reverting to medium latency\n");
-               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+               uuarn = MLX5_CROSS_CHANNEL_UUAR;
+       else {
+               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
                if (uuarn < 0) {
-                       mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
-                       mlx5_ib_dbg(dev, "reverting to high latency\n");
-                       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+                       mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+                       mlx5_ib_dbg(dev, "reverting to medium latency\n");
+                       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
                        if (uuarn < 0) {
-                               mlx5_ib_warn(dev, "uuar allocation failed\n");
-                               return uuarn;
+                               mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+                               mlx5_ib_dbg(dev, "reverting to high latency\n");
+                               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+                               if (uuarn < 0) {
+                                       mlx5_ib_warn(dev, "uuar allocation failed\n");
+                                       return uuarn;
+                               }
                        }
                }
        }
@@ -638,32 +693,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
 
-       err = set_user_buf_size(dev, qp, &ucmd);
+       err = set_user_buf_size(dev, qp, &ucmd, base, attr);
        if (err)
                goto err_uuar;
 
-       if (ucmd.buf_addr && qp->buf_size) {
-               qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
-                                      qp->buf_size, 0, 0);
-               if (IS_ERR(qp->umem)) {
-                       mlx5_ib_dbg(dev, "umem_get failed\n");
-                       err = PTR_ERR(qp->umem);
+       if (ucmd.buf_addr && ubuffer->buf_size) {
+               ubuffer->buf_addr = ucmd.buf_addr;
+               err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+                                      ubuffer->buf_size,
+                                      &ubuffer->umem, &npages, &page_shift,
+                                      &ncont, &offset);
+               if (err)
                        goto err_uuar;
-               }
        } else {
-               qp->umem = NULL;
-       }
-
-       if (qp->umem) {
-               mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
-                                  &ncont, NULL);
-               err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
-               if (err) {
-                       mlx5_ib_warn(dev, "bad offset\n");
-                       goto err_umem;
-               }
-               mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
-                           ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+               ubuffer->umem = NULL;
        }
 
        *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +715,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                err = -ENOMEM;
                goto err_umem;
        }
-       if (qp->umem)
-               mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       if (ubuffer->umem)
+               mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+                                    (*in)->pas, 0);
        (*in)->ctx.log_pg_sz_remote_qpn =
                cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
        (*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +748,31 @@ err_free:
        kvfree(*in);
 
 err_umem:
-       if (qp->umem)
-               ib_umem_release(qp->umem);
+       if (ubuffer->umem)
+               ib_umem_release(ubuffer->umem);
 
 err_uuar:
        free_uuar(&context->uuari, uuarn);
        return err;
 }
 
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+                           struct mlx5_ib_qp_base *base)
 {
        struct mlx5_ib_ucontext *context;
 
        context = to_mucontext(pd->uobject->context);
        mlx5_ib_db_unmap_user(context, &qp->db);
-       if (qp->umem)
-               ib_umem_release(qp->umem);
+       if (base->ubuffer.umem)
+               ib_umem_release(base->ubuffer.umem);
        free_uuar(&context->uuari, qp->uuarn);
 }
 
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_create_qp_mbox_in **in, int *inlen)
+                           struct mlx5_create_qp_mbox_in **in, int *inlen,
+                           struct mlx5_ib_qp_base *base)
 {
        enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
        struct mlx5_uuar_info *uuari;
@@ -758,9 +804,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 
        qp->rq.offset = 0;
        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
-       qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+       base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
 
-       err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+       err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
        if (err) {
                mlx5_ib_dbg(dev, "err %d\n", err);
                goto err_uuar;
@@ -853,19 +899,304 @@ static int is_connected(enum ib_qp_type qp_type)
        return 0;
 }
 
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+                                   struct mlx5_ib_sq *sq, u32 tdn)
+{
+       u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+       void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+       memset(in, 0, sizeof(in));
+
+       MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+       return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_sq *sq)
+{
+       mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_sq *sq, void *qpin,
+                                  struct ib_pd *pd)
+{
+       struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+       __be64 *pas;
+       void *in;
+       void *sqc;
+       void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+       void *wq;
+       int inlen;
+       int err;
+       int page_shift = 0;
+       int npages;
+       int ncont = 0;
+       u32 offset = 0;
+
+       err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+                              &sq->ubuffer.umem, &npages, &page_shift,
+                              &ncont, &offset);
+       if (err)
+               return err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_umem;
+       }
+
+       sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+       MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+       MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+       MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+       MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+       MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+       MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+       wq = MLX5_ADDR_OF(sqc, sqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+       MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+       MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+       MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+       MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+       MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+       MLX5_SET(wq, wq, log_wq_pg_sz,  page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET(wq, wq, page_offset, offset);
+
+       pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+       mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+       err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+       kvfree(in);
+
+       if (err)
+               goto err_umem;
+
+       return 0;
+
+err_umem:
+       ib_umem_release(sq->ubuffer.umem);
+       sq->ubuffer.umem = NULL;
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_sq *sq)
+{
+       mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+       ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+       u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+       u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+       u32 log_rq_size   = MLX5_GET(qpc, qpc, log_rq_size);
+       u32 page_offset   = MLX5_GET(qpc, qpc, page_offset);
+       u32 po_quanta     = 1 << (log_page_size - 6);
+       u32 rq_sz         = 1 << (log_rq_size + 4 + log_rq_stride);
+       u32 page_size     = 1 << log_page_size;
+       u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
+       u32 rq_num_pas    = (rq_sz_po + page_size - 1) / page_size;
+
+       return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                  struct mlx5_ib_rq *rq, void *qpin)
+{
+       __be64 *pas;
+       __be64 *qp_pas;
+       void *in;
+       void *rqc;
+       void *wq;
+       void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+       int inlen;
+       int err;
+       u32 rq_pas_size = get_rq_pas_size(qpc);
+
+       inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+       MLX5_SET(rqc, rqc, vsd, 1);
+       MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+       MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+       MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+       MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+       MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+       wq = MLX5_ADDR_OF(rqc, rqc, wq);
+       MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+       MLX5_SET(wq, wq, end_padding_mode,
+                MLX5_GET(qpc, qpc, end_padding_mode));
+       MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+       MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+       MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+       MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+       MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+       MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+       pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+       qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+       memcpy(pas, qp_pas, rq_pas_size);
+
+       err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_rq *rq)
+{
+       mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                   struct mlx5_ib_rq *rq, u32 tdn)
+{
+       u32 *in;
+       void *tirc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+       MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+       MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+       err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+                                     struct mlx5_ib_rq *rq)
+{
+       mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               struct mlx5_create_qp_mbox_in *in,
+                               struct ib_pd *pd)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       struct ib_uobject *uobj = pd->uobject;
+       struct ib_ucontext *ucontext = uobj->context;
+       struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+       int err;
+       u32 tdn = mucontext->tdn;
+
+       if (qp->sq.wqe_cnt) {
+               err = create_raw_packet_qp_tis(dev, sq, tdn);
+               if (err)
+                       return err;
+
+               err = create_raw_packet_qp_sq(dev, sq, in, pd);
+               if (err)
+                       goto err_destroy_tis;
+
+               sq->base.container_mibqp = qp;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err = create_raw_packet_qp_rq(dev, rq, in);
+               if (err)
+                       goto err_destroy_sq;
+
+               rq->base.container_mibqp = qp;
+
+               err = create_raw_packet_qp_tir(dev, rq, tdn);
+               if (err)
+                       goto err_destroy_rq;
+       }
+
+       qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+                                                    rq->base.mqp.qpn;
+
+       return 0;
+
+err_destroy_rq:
+       destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+       if (!qp->sq.wqe_cnt)
+               return err;
+       destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+       destroy_raw_packet_qp_tis(dev, sq);
+
+       return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+                                 struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+       if (qp->rq.wqe_cnt) {
+               destroy_raw_packet_qp_tir(dev, rq);
+               destroy_raw_packet_qp_rq(dev, rq);
+       }
+
+       if (qp->sq.wqe_cnt) {
+               destroy_raw_packet_qp_sq(dev, sq);
+               destroy_raw_packet_qp_tis(dev, sq);
+       }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+                                   struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+       sq->sq = &qp->sq;
+       rq->rq = &qp->rq;
+       sq->doorbell = &qp->db;
+       rq->doorbell = &qp->db;
+}
+
 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_resources *devr = &dev->devr;
        struct mlx5_core_dev *mdev = dev->mdev;
+       struct mlx5_ib_qp_base *base;
        struct mlx5_ib_create_qp_resp resp;
        struct mlx5_create_qp_mbox_in *in;
        struct mlx5_ib_create_qp ucmd;
        int inlen = sizeof(*in);
        int err;
+       u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       void *qpc;
+
+       base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+              &qp->raw_packet_qp.rq.base :
+              &qp->trans_qp.base;
 
-       mlx5_ib_odp_create_qp(qp);
+       if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+               mlx5_ib_odp_create_qp(qp);
 
        mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
@@ -880,6 +1211,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                }
        }
 
+       if (init_attr->create_flags &
+                       (IB_QP_CREATE_CROSS_CHANNEL |
+                        IB_QP_CREATE_MANAGED_SEND |
+                        IB_QP_CREATE_MANAGED_RECV)) {
+               if (!MLX5_CAP_GEN(mdev, cd)) {
+                       mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+                       return -EINVAL;
+               }
+               if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+                       qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+               if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+                       qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+               if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+                       qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+       }
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
@@ -889,6 +1235,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                        return -EFAULT;
                }
 
+               err = get_qp_user_index(to_mucontext(pd->uobject->context),
+                                       &ucmd, udata->inlen, &uidx);
+               if (err)
+                       return err;
+
                qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
                qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
        } else {
@@ -918,11 +1269,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                                            ucmd.sq_wqe_count, max_wqes);
                                return -EINVAL;
                        }
-                       err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+                       err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+                                            &resp, &inlen, base);
                        if (err)
                                mlx5_ib_dbg(dev, "err %d\n", err);
                } else {
-                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+                                              base);
                        if (err)
                                mlx5_ib_dbg(dev, "err %d\n", err);
                }
@@ -954,6 +1307,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
                in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
 
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+       if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+       if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                int rcqe_sz;
                int scqe_sz;
@@ -1018,26 +1378,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
        in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
 
-       err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+       if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+               /* 0xffffff means we ask to work with cqe version 0 */
+               MLX5_SET(qpc, qpc, user_index, uidx);
+       }
+
+       if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+               qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+               raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+               err = create_raw_packet_qp(dev, qp, in, pd);
+       } else {
+               err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+       }
+
        if (err) {
                mlx5_ib_dbg(dev, "create qp failed\n");
                goto err_create;
        }
 
        kvfree(in);
-       /* Hardware wants QPN written in big-endian order (after
-        * shifting) for send doorbell.  Precompute this value to save
-        * a little bit when posting sends.
-        */
-       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
 
-       qp->mqp.event = mlx5_ib_qp_event;
+       base->container_mibqp = qp;
+       base->mqp.event = mlx5_ib_qp_event;
 
        return 0;
 
 err_create:
        if (qp->create_type == MLX5_QP_USER)
-               destroy_qp_user(pd, qp);
+               destroy_qp_user(pd, qp, base);
        else if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
 
@@ -1129,11 +1498,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
        case IB_QPT_UD:
        case IB_QPT_RAW_IPV6:
        case IB_QPT_RAW_ETHERTYPE:
+       case IB_QPT_RAW_PACKET:
                *send_cq = to_mcq(qp->ibqp.send_cq);
                *recv_cq = to_mcq(qp->ibqp.recv_cq);
                break;
 
-       case IB_QPT_RAW_PACKET:
        case IB_QPT_MAX:
        default:
                *send_cq = NULL;
@@ -1142,45 +1511,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
        }
 }
 
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               u16 operation);
+
 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_cq *send_cq, *recv_cq;
+       struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_modify_qp_mbox_in *in;
        int err;
 
+       base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+              &qp->raw_packet_qp.rq.base :
+              &qp->trans_qp.base;
+
        in = kzalloc(sizeof(*in), GFP_KERNEL);
        if (!in)
                return;
 
        if (qp->state != IB_QPS_RESET) {
-               mlx5_ib_qp_disable_pagefaults(qp);
-               if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
-                                       MLX5_QP_STATE_RST, in, 0, &qp->mqp))
-                       mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
-                                    qp->mqp.qpn);
+               if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+                       mlx5_ib_qp_disable_pagefaults(qp);
+                       err = mlx5_core_qp_modify(dev->mdev,
+                                                 MLX5_CMD_OP_2RST_QP, in, 0,
+                                                 &base->mqp);
+               } else {
+                       err = modify_raw_packet_qp(dev, qp,
+                                                  MLX5_CMD_OP_2RST_QP);
+               }
+               if (err)
+                       mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+                                    base->mqp.qpn);
        }
 
        get_cqs(qp, &send_cq, &recv_cq);
 
        if (qp->create_type == MLX5_QP_KERNEL) {
                mlx5_ib_lock_cqs(send_cq, recv_cq);
-               __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+               __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
                                   qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
                if (send_cq != recv_cq)
-                       __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+                       __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+                                          NULL);
                mlx5_ib_unlock_cqs(send_cq, recv_cq);
        }
 
-       err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
-       if (err)
-               mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
-       kfree(in);
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               destroy_raw_packet_qp(dev, qp);
+       } else {
+               err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+               if (err)
+                       mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+                                    base->mqp.qpn);
+       }
 
+       kfree(in);
 
        if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
        else if (qp->create_type == MLX5_QP_USER)
-               destroy_qp_user(&get_pd(qp)->ibpd, qp);
+               destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
 }
 
 static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1225,6 +1615,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
        if (pd) {
                dev = to_mdev(pd->device);
+
+               if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+                       if (!pd->uobject) {
+                               mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+                               return ERR_PTR(-EINVAL);
+                       } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+                               mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+                               return ERR_PTR(-EINVAL);
+                       }
+               }
        } else {
                /* being cautious here */
                if (init_attr->qp_type != IB_QPT_XRC_TGT &&
@@ -1250,6 +1650,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                }
 
                /* fall through */
+       case IB_QPT_RAW_PACKET:
        case IB_QPT_RC:
        case IB_QPT_UC:
        case IB_QPT_UD:
@@ -1272,19 +1673,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
                else if (is_qp1(init_attr->qp_type))
                        qp->ibqp.qp_num = 1;
                else
-                       qp->ibqp.qp_num = qp->mqp.qpn;
+                       qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
 
                mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
-                           qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+                           qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+                           to_mcq(init_attr->recv_cq)->mcq.cqn,
                            to_mcq(init_attr->send_cq)->mcq.cqn);
 
-               qp->xrcdn = xrcdn;
+               qp->trans_qp.xrcdn = xrcdn;
 
                break;
 
        case IB_QPT_RAW_IPV6:
        case IB_QPT_RAW_ETHERTYPE:
-       case IB_QPT_RAW_PACKET:
        case IB_QPT_MAX:
        default:
                mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1719,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
                dest_rd_atomic = attr->max_dest_rd_atomic;
        else
-               dest_rd_atomic = qp->resp_depth;
+               dest_rd_atomic = qp->trans_qp.resp_depth;
 
        if (attr_mask & IB_QP_ACCESS_FLAGS)
                access_flags = attr->qp_access_flags;
        else
-               access_flags = qp->atomic_rd_en;
+               access_flags = qp->trans_qp.atomic_rd_en;
 
        if (!dest_rd_atomic)
                access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1761,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
        return rate + MLX5_STAT_RATE_OFFSET;
 }
 
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+                                     struct mlx5_ib_sq *sq, u8 sl)
+{
+       void *in;
+       void *tisc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+       tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+       MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+       err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                        const struct ib_ah_attr *ah,
                         struct mlx5_qp_path *path, u8 port, int attr_mask,
                         u32 path_flags, const struct ib_qp_attr *attr)
 {
+       enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
        int err;
 
-       path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
-       path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
        if (attr_mask & IB_QP_PKEY_INDEX)
                path->pkey_index = attr->pkey_index;
 
-       path->grh_mlid  = ah->src_path_bits & 0x7f;
-       path->rlid      = cpu_to_be16(ah->dlid);
-
        if (ah->ah_flags & IB_AH_GRH) {
                if (ah->grh.sgid_index >=
                    dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1805,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
                               dev->mdev->port_caps[port - 1].gid_table_len);
                        return -EINVAL;
                }
-               path->grh_mlid |= 1 << 7;
+       }
+
+       if (ll == IB_LINK_LAYER_ETHERNET) {
+               if (!(ah->ah_flags & IB_AH_GRH))
+                       return -EINVAL;
+               memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+               path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+                                                         ah->grh.sgid_index);
+               path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+       } else {
+               path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+               path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+                                                                       0;
+               path->rlid = cpu_to_be16(ah->dlid);
+               path->grh_mlid = ah->src_path_bits & 0x7f;
+               if (ah->ah_flags & IB_AH_GRH)
+                       path->grh_mlid  |= 1 << 7;
+               path->dci_cfi_prio_sl = ah->sl & 0xf;
+       }
+
+       if (ah->ah_flags & IB_AH_GRH) {
                path->mgid_index = ah->grh.sgid_index;
                path->hop_limit  = ah->grh.hop_limit;
                path->tclass_flowlabel =
@@ -1401,7 +1843,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
        if (attr_mask & IB_QP_TIMEOUT)
                path->ackto_lt = attr->timeout << 3;
 
-       path->sl = ah->sl & 0xf;
+       if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+               return modify_raw_packet_eth_prio(dev->mdev,
+                                                 &qp->raw_packet_qp.sq,
+                                                 ah->sl & 0xf);
 
        return 0;
 }
@@ -1549,12 +1994,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
        return result;
 }
 
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+                                  struct mlx5_ib_rq *rq, int new_state)
+{
+       void *in;
+       void *rqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+       rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+       MLX5_SET(rqc, rqc, state, new_state);
+
+       err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+       if (err)
+               goto out;
+
+       rq->state = new_state;
+
+out:
+       kvfree(in);
+       return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+                                  struct mlx5_ib_sq *sq, int new_state)
+{
+       void *in;
+       void *sqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+       sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+       MLX5_SET(sqc, sqc, state, new_state);
+
+       err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+       if (err)
+               goto out;
+
+       sq->state = new_state;
+
+out:
+       kvfree(in);
+       return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                               u16 operation)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       int rq_state;
+       int sq_state;
+       int err;
+
+       switch (operation) {
+       case MLX5_CMD_OP_RST2INIT_QP:
+               rq_state = MLX5_RQC_STATE_RDY;
+               sq_state = MLX5_SQC_STATE_RDY;
+               break;
+       case MLX5_CMD_OP_2ERR_QP:
+               rq_state = MLX5_RQC_STATE_ERR;
+               sq_state = MLX5_SQC_STATE_ERR;
+               break;
+       case MLX5_CMD_OP_2RST_QP:
+               rq_state = MLX5_RQC_STATE_RST;
+               sq_state = MLX5_SQC_STATE_RST;
+               break;
+       case MLX5_CMD_OP_INIT2INIT_QP:
+       case MLX5_CMD_OP_INIT2RTR_QP:
+       case MLX5_CMD_OP_RTR2RTS_QP:
+       case MLX5_CMD_OP_RTS2RTS_QP:
+               /* Nothing to do here... */
+               return 0;
+       default:
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err =  modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+               if (err)
+                       return err;
+       }
+
+       if (qp->sq.wqe_cnt)
+               return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+       return 0;
+}
+
 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
 {
+       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+               [MLX5_QP_STATE_RST] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
+               },
+               [MLX5_QP_STATE_INIT]  = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
+                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
+               },
+               [MLX5_QP_STATE_RTR]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
+               },
+               [MLX5_QP_STATE_RTS]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
+               },
+               [MLX5_QP_STATE_SQD] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               },
+               [MLX5_QP_STATE_SQER] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
+               },
+               [MLX5_QP_STATE_ERR] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               }
+       };
+
        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
        struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        int sqd_event;
        int mlx5_st;
        int err;
+       u16 op;
 
        in = kzalloc(sizeof(*in), GFP_KERNEL);
        if (!in)
@@ -1623,7 +2211,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                context->pri_path.port = attr->port_num;
 
        if (attr_mask & IB_QP_AV) {
-               err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+               err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
                                    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
                                    attr_mask, 0, attr);
                if (err)
@@ -1634,7 +2222,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                context->pri_path.ackto_lt |= attr->timeout << 3;
 
        if (attr_mask & IB_QP_ALT_PATH) {
-               err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+               err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+                                   &context->alt_path,
                                    attr->alt_port_num, attr_mask, 0, attr);
                if (err)
                        goto out;
@@ -1706,41 +2295,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
         * again to RTS, and may cause the driver and the device to get out of
         * sync. */
        if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
-           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+           (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+           (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
                mlx5_ib_qp_disable_pagefaults(qp);
 
+       if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+           !optab[mlx5_cur][mlx5_new])
+               goto out;
+
+       op = optab[mlx5_cur][mlx5_new];
        optpar = ib_mask_to_mlx5_opt(attr_mask);
        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
        in->optparam = cpu_to_be32(optpar);
-       err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
-                                 to_mlx5_state(new_state), in, sqd_event,
-                                 &qp->mqp);
+
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+               err = modify_raw_packet_qp(dev, qp, op);
+       else
+               err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+                                         &base->mqp);
        if (err)
                goto out;
 
-       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+           (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
                mlx5_ib_qp_enable_pagefaults(qp);
 
        qp->state = new_state;
 
        if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->atomic_rd_en = attr->qp_access_flags;
+               qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->resp_depth = attr->max_dest_rd_atomic;
+               qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
        if (attr_mask & IB_QP_PORT)
                qp->port = attr->port_num;
        if (attr_mask & IB_QP_ALT_PATH)
-               qp->alt_port = attr->alt_port_num;
+               qp->trans_qp.alt_port = attr->alt_port_num;
 
        /*
         * If we moved a kernel QP to RESET, clean up all old CQ
         * entries and reinitialize the QP.
         */
        if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-               mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+               mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
                                 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
                if (send_cq != recv_cq)
-                       mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+                       mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
 
                qp->rq.head = 0;
                qp->rq.tail = 0;
@@ -1765,15 +2364,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        enum ib_qp_state cur_state, new_state;
        int err = -EINVAL;
        int port;
+       enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 
        mutex_lock(&qp->mutex);
 
        cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
+       if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+               port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+               ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+       }
+
        if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
            !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
-                               IB_LINK_LAYER_UNSPECIFIED))
+                               ll))
                goto out;
 
        if ((attr_mask & IB_QP_PORT) &&
@@ -2570,7 +3175,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 
        ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
                                             mlx5_opcode | ((u32)opmod << 24));
-       ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+       ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
        ctrl->fm_ce_se |= fence;
        qp->fm_cache = next_fence;
        if (unlikely(qp->wq_sig))
@@ -3003,7 +3608,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
            ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
                return;
 
-       ib_ah_attr->sl = path->sl & 0xf;
+       ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
 
        ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
        ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3626,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
        }
 }
 
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
-                    struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+                                       struct mlx5_ib_sq *sq,
+                                       u8 *sq_state)
+{
+       void *out;
+       void *sqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+       out = mlx5_vzalloc(inlen);
+       if (!out)
+               return -ENOMEM;
+
+       err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+       if (err)
+               goto out;
+
+       sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+       *sq_state = MLX5_GET(sqc, sqc, state);
+       sq->state = *sq_state;
+
+out:
+       kvfree(out);
+       return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+                                       struct mlx5_ib_rq *rq,
+                                       u8 *rq_state)
+{
+       void *out;
+       void *rqc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+       out = mlx5_vzalloc(inlen);
+       if (!out)
+               return -ENOMEM;
+
+       err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+       if (err)
+               goto out;
+
+       rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+       *rq_state = MLX5_GET(rqc, rqc, state);
+       rq->state = *rq_state;
+
+out:
+       kvfree(out);
+       return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+                                 struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+       static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+               [MLX5_RQC_STATE_RST] = {
+                       [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQ_STATE_NA]      = IB_QPS_RESET,
+               },
+               [MLX5_RQC_STATE_RDY] = {
+                       [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
+                       [MLX5_SQC_STATE_ERR]    = IB_QPS_SQE,
+                       [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE,
+               },
+               [MLX5_RQC_STATE_ERR] = {
+                       [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
+                       [MLX5_SQC_STATE_ERR]    = IB_QPS_ERR,
+                       [MLX5_SQ_STATE_NA]      = IB_QPS_ERR,
+               },
+               [MLX5_RQ_STATE_NA] = {
+                       [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
+                       [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
+                       [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE,
+                       [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE_BAD,
+               },
+       };
+
+       *qp_state = sqrq_trans[rq_state][sq_state];
+
+       if (*qp_state == MLX5_QP_STATE_BAD) {
+               WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+                    qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+                    qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+               return -EINVAL;
+       }
+
+       if (*qp_state == MLX5_QP_STATE)
+               *qp_state = qp->state;
+
+       return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+                                    struct mlx5_ib_qp *qp,
+                                    u8 *raw_packet_qp_state)
+{
+       struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+       struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+       struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+       int err;
+       u8 sq_state = MLX5_SQ_STATE_NA;
+       u8 rq_state = MLX5_RQ_STATE_NA;
+
+       if (qp->sq.wqe_cnt) {
+               err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+               if (err)
+                       return err;
+       }
+
+       if (qp->rq.wqe_cnt) {
+               err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+               if (err)
+                       return err;
+       }
+
+       return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+                                     raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                        struct ib_qp_attr *qp_attr)
 {
-       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx5_ib_qp *qp = to_mqp(ibqp);
        struct mlx5_query_qp_mbox_out *outb;
        struct mlx5_qp_context *context;
        int mlx5_state;
        int err = 0;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       /*
-        * Wait for any outstanding page faults, in case the user frees memory
-        * based upon this query's result.
-        */
-       flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
-       mutex_lock(&qp->mutex);
        outb = kzalloc(sizeof(*outb), GFP_KERNEL);
-       if (!outb) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!outb)
+               return -ENOMEM;
+
        context = &outb->ctx;
-       err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+       err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+                                sizeof(*outb));
        if (err)
-               goto out_free;
+               goto out;
 
        mlx5_state = be32_to_cpu(context->flags) >> 28;
 
        qp->state                    = to_ib_qp_state(mlx5_state);
-       qp_attr->qp_state            = qp->state;
        qp_attr->path_mtu            = context->mtu_msgmax >> 5;
        qp_attr->path_mig_state      =
                to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +3806,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
        qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
        qp_attr->rnr_retry          = (be32_to_cpu(context->params1) >> 13) & 0x7;
        qp_attr->alt_timeout        = context->alt_path.ackto_lt >> 3;
+
+out:
+       kfree(outb);
+       return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+                    int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       int err = 0;
+       u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       /*
+        * Wait for any outstanding page faults, in case the user frees memory
+        * based upon this query's result.
+        */
+       flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+       mutex_lock(&qp->mutex);
+
+       if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+               err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+               if (err)
+                       goto out;
+               qp->state = raw_packet_qp_state;
+               qp_attr->port_num = 1;
+       } else {
+               err = query_qp_attr(dev, qp, qp_attr);
+               if (err)
+                       goto out;
+       }
+
+       qp_attr->qp_state            = qp->state;
        qp_attr->cur_qp_state        = qp_attr->qp_state;
        qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
        qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
@@ -3110,12 +3866,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
                qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
+       if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+               qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+       if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+               qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+       if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+               qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
        qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
                IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 
-out_free:
-       kfree(outb);
-
 out:
        mutex_unlock(&qp->mutex);
        return err;
index e008505e96e9778ff9a71cfc021de7ecee6c3331..4659256cd95e698c3a9dad3c69407276e2117f29 100644 (file)
@@ -78,28 +78,41 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                           struct ib_udata *udata, int buf_size, int *inlen)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_ib_create_srq ucmd;
+       struct mlx5_ib_create_srq ucmd = {};
        size_t ucmdlen;
+       void *xsrqc;
        int err;
        int npages;
        int page_shift;
        int ncont;
        u32 offset;
+       u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 
-       ucmdlen =
-               (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
-                sizeof(ucmd)) ? (sizeof(ucmd) -
-                                 sizeof(ucmd.reserved)) : sizeof(ucmd);
+       if (drv_data < 0)
+               return -EINVAL;
+
+       ucmdlen = (drv_data < sizeof(ucmd)) ?
+                 drv_data : sizeof(ucmd);
 
        if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
                mlx5_ib_dbg(dev, "failed copy udata\n");
                return -EFAULT;
        }
 
-       if (ucmdlen == sizeof(ucmd) &&
-           ucmd.reserved != 0)
+       if (ucmd.reserved0 || ucmd.reserved1)
                return -EINVAL;
 
+       if (drv_data > sizeof(ucmd) &&
+           !ib_is_udata_cleared(udata, sizeof(ucmd),
+                                drv_data - sizeof(ucmd)))
+               return -EINVAL;
+
+       err = get_srq_user_index(to_mucontext(pd->uobject->context),
+                                &ucmd, udata->inlen, &uidx);
+       if (err)
+               return err;
+
        srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
        srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +151,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
        (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+                                    xrc_srq_context_entry);
+               MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+       }
+
        return 0;
 
 err_in:
@@ -158,6 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        struct mlx5_wqe_srq_next_seg *next;
        int page_shift;
        int npages;
+       void *xsrqc;
 
        err = mlx5_db_alloc(dev->mdev, &srq->db);
        if (err) {
@@ -204,6 +224,13 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 
        (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+               xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+                                    xrc_srq_context_entry);
+               /* 0xffffff means we ask to work with cqe version 0 */
+               MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+       }
+
        return 0;
 
 err_in:
index 76fb7b927d373ef2710d412ec015309fb24fdd72..b94a55404a596dab323a843dec9a817fddbaff74 100644 (file)
@@ -35,6 +35,8 @@
 
 #include <linux/types.h>
 
+#include "mlx5_ib.h"
+
 enum {
        MLX5_QP_FLAG_SIGNATURE          = 1 << 0,
        MLX5_QP_FLAG_SCATTER_CQE        = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
        __u32   total_num_uuars;
        __u32   num_low_latency_uuars;
        __u32   flags;
-       __u32   reserved;
+       __u32   comp_mask;
+       __u8    max_cqe_version;
+       __u8    reserved0;
+       __u16   reserved1;
+       __u32   reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+       MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
 };
 
 struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
        __u32   max_recv_wr;
        __u32   max_srq_recv_wr;
        __u16   num_ports;
-       __u16   reserved;
+       __u16   reserved1;
+       __u32   comp_mask;
+       __u32   response_length;
+       __u8    cqe_version;
+       __u8    reserved2;
+       __u16   reserved3;
+       __u64   hca_core_clock_offset;
 };
 
 struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
        __u64   buf_addr;
        __u64   db_addr;
        __u32   flags;
-       __u32   reserved; /* explicit padding (optional on i386) */
+       __u32   reserved0; /* explicit padding (optional on i386) */
+       __u32   uidx;
+       __u32   reserved1;
 };
 
 struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
        __u32   rq_wqe_count;
        __u32   rq_wqe_shift;
        __u32   flags;
+       __u32   uidx;
+       __u32   reserved0;
+       __u64   sq_buf_addr;
 };
 
 struct mlx5_ib_create_qp_resp {
        __u32   uuar_index;
 };
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+                                   struct mlx5_ib_create_qp *ucmd,
+                                   int inlen,
+                                   u32 *user_index)
+{
+       u8 cqe_version = ucontext->cqe_version;
+
+       if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+
+       if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+                                    struct mlx5_ib_create_srq *ucmd,
+                                    int inlen,
+                                    u32 *user_index)
+{
+       u8 cqe_version = ucontext->cqe_version;
+
+       if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+           !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+               return 0;
+
+       if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+              !!cqe_version))
+               return -EINVAL;
+
+       return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
 #endif /* MLX5_IB_USER_H */
index 40ba8333815571ff2b8c5e24e173713f3a16e66e..a6531ffe29a6f7afebd52ae41223f7fd7d74d131 100644 (file)
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                        entry->opcode    = IB_WC_FETCH_ADD;
                        entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
                        break;
-               case MTHCA_OPCODE_BIND_MW:
-                       entry->opcode    = IB_WC_BIND_MW;
-                       break;
                default:
                        entry->opcode    = MTHCA_OPCODE_INVALID;
                        break;
index dc2d48c59e6274c54e84af0d26b4da1b56271b13..9866c35cc977d5036b8b7c6b1126a3508eef9212 100644 (file)
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
        return &mr->ibmr;
 }
 
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
-                                      struct ib_phys_buf *buffer_list,
-                                      int                 num_phys_buf,
-                                      int                 acc,
-                                      u64                *iova_start)
-{
-       struct mthca_mr *mr;
-       u64 *page_list;
-       u64 total_size;
-       unsigned long mask;
-       int shift;
-       int npages;
-       int err;
-       int i, j, n;
-
-       mask = buffer_list[0].addr ^ *iova_start;
-       total_size = 0;
-       for (i = 0; i < num_phys_buf; ++i) {
-               if (i != 0)
-                       mask |= buffer_list[i].addr;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-
-               total_size += buffer_list[i].size;
-       }
-
-       if (mask & ~PAGE_MASK)
-               return ERR_PTR(-EINVAL);
-
-       shift = __ffs(mask | 1 << 31);
-
-       buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
-       buffer_list[0].addr &= ~0ull << shift;
-
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(-ENOMEM);
-
-       npages = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
-       if (!npages)
-               return &mr->ibmr;
-
-       page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
-       if (!page_list) {
-               kfree(mr);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       n = 0;
-       for (i = 0; i < num_phys_buf; ++i)
-               for (j = 0;
-                    j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-                    ++j)
-                       page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
-       mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
-                 "in PD %x; shift %d, npages %d.\n",
-                 (unsigned long long) buffer_list[0].addr,
-                 (unsigned long long) *iova_start,
-                 to_mpd(pd)->pd_num,
-                 shift, npages);
-
-       err = mthca_mr_alloc_phys(to_mdev(pd->device),
-                                 to_mpd(pd)->pd_num,
-                                 page_list, shift, npages,
-                                 *iova_start, total_size,
-                                 convert_access(acc), mr);
-
-       if (err) {
-               kfree(page_list);
-               kfree(mr);
-               return ERR_PTR(err);
-       }
-
-       kfree(page_list);
-       mr->umem = NULL;
-
-       return &mr->ibmr;
-}
-
 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                       u64 virt, int acc, struct ib_udata *udata)
 {
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
        dev->ib_dev.destroy_cq           = mthca_destroy_cq;
        dev->ib_dev.poll_cq              = mthca_poll_cq;
        dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
-       dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
        dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
        dev->ib_dev.dereg_mr             = mthca_dereg_mr;
        dev->ib_dev.get_port_immutable   = mthca_port_immutable;
index 35fe506e2cfa892259a4975919b07355be3df345..96e5fb91fb48e7c48d802e8b30e7e5af20c6a630 100644 (file)
@@ -1485,7 +1485,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        u16 pkey;
 
        ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
-                         mthca_ah_grh_present(to_mah(wr->ah)), 0,
+                         mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
                          &sqp->ud_header);
 
        err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
index 8a3ad170d790cc336c08a527db314d859beefd6c..cb9f0f27308de0dd439844cc06742a74caa52899 100644 (file)
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
 /* External CM API Interface */
 /* instance of function pointers for client API */
 /* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
        mini_cm_accelerated,
        mini_cm_listen,
        mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        int passive_state;
        struct nes_ib_device *nesibdev;
        struct ib_mr *ibmr = NULL;
-       struct ib_phys_buf ibphysbuf;
        struct nes_pd *nespd;
        u64 tagged_offset;
        u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                u64temp = (unsigned long)nesqp;
                nesibdev = nesvnic->nesibdev;
                nespd = nesqp->nespd;
-               ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
-               ibphysbuf.size = buff_len;
                tagged_offset = (u64)(unsigned long)*start_buff;
-               ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
-                                                  &ibphysbuf, 1,
-                                                  IB_ACCESS_LOCAL_WRITE,
-                                                  &tagged_offset);
-               if (!ibmr) {
+               ibmr = nes_reg_phys_mr(&nespd->ibpd,
+                               nesqp->ietf_frame_pbase + mpa_frame_offset,
+                               buff_len, IB_ACCESS_LOCAL_WRITE,
+                               &tagged_offset);
+               if (IS_ERR(ibmr)) {
                        nes_debug(NES_DBG_CM, "Unable to register memory region"
                                  "for lSMM for cm_node = %p \n",
                                  cm_node);
                        pci_free_consistent(nesdev->pcidev,
                                            nesqp->private_data_len + nesqp->ietf_frame_size,
                                            nesqp->ietf_frame, nesqp->ietf_frame_pbase);
-                       return -ENOMEM;
+                       return PTR_ERR(ibmr);
                }
 
                ibmr->pd = &nespd->ibpd;
index 32a6420c29400184ea8f392c9e7004eb51611b67..147c2c884227d82ad3f40cec561a91abee71f80a 100644 (file)
@@ -423,7 +423,7 @@ struct nes_cm_core {
 
        struct timer_list       tcp_timer;
 
-       struct nes_cm_ops       *api;
+       const struct nes_cm_ops *api;
 
        int (*post_event)(struct nes_cm_event *event);
        atomic_t                events_posted;
index 2042c0f2975942a3284bce33eda298cfc32ca11d..6d3a169c049b315d764dc1eec9dd47436c53b1a4 100644 (file)
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
        if (action == NES_ARP_DELETE) {
                nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
                nesadapter->arp_table[arp_index].ip_addr = 0;
-               memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+               eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
                nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
                return arp_index;
        }
index 137880a19ebe4827bc7f8b419ba6faf9dc50de41..8c4daf7f22ec14fc5a56b13f5ddeca9c09a142e7 100644 (file)
@@ -206,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
 }
 
 
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
-               struct ib_mw_bind *ibmw_bind)
-{
-       u64 u64temp;
-       struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
-       struct nes_device *nesdev = nesvnic->nesdev;
-       /* struct nes_mr *nesmr = to_nesmw(ibmw); */
-       struct nes_qp *nesqp = to_nesqp(ibqp);
-       struct nes_hw_qp_wqe *wqe;
-       unsigned long flags = 0;
-       u32 head;
-       u32 wqe_misc = 0;
-       u32 qsize;
-
-       if (nesqp->ibqp_state > IB_QPS_RTS)
-               return -EINVAL;
-
-       spin_lock_irqsave(&nesqp->lock, flags);
-
-       head = nesqp->hwqp.sq_head;
-       qsize = nesqp->hwqp.sq_tail;
-
-       /* Check for SQ overflow */
-       if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
-               spin_unlock_irqrestore(&nesqp->lock, flags);
-               return -ENOMEM;
-       }
-
-       wqe = &nesqp->hwqp.sq_vbase[head];
-       /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
-       nes_fill_init_qp_wqe(wqe, nesqp, head);
-       u64temp = ibmw_bind->wr_id;
-       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
-       wqe_misc = NES_IWARP_SQ_OP_BIND;
-
-       wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
-       if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
-               wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
-       if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
-               wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
-       if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
-               wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
-                           ibmw_bind->bind_info.mr->lkey);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
-       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
-                       ibmw_bind->bind_info.length);
-       wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
-       u64temp = (u64)ibmw_bind->bind_info.addr;
-       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
-       head++;
-       if (head >= qsize)
-               head = 0;
-
-       nesqp->hwqp.sq_head = head;
-       barrier();
-
-       nes_write32(nesdev->regs+NES_WQE_ALLOC,
-                       (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
-       spin_unlock_irqrestore(&nesqp->lock, flags);
-
-       return 0;
-}
-
-
 /*
  * nes_alloc_fast_mr
  */
@@ -2074,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
 /**
  * nes_reg_phys_mr
  */
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
-               struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
-               u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+               int acc, u64 *iova_start)
 {
        u64 region_length;
        struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2088,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
        struct nes_vpbl vpbl;
        struct nes_root_vpbl root_vpbl;
        u32 stag;
-       u32 i;
        unsigned long mask;
        u32 stag_index = 0;
        u32 next_stag_index = 0;
        u32 driver_key = 0;
-       u32 root_pbl_index = 0;
-       u32 cur_pbl_index = 0;
        int err = 0;
        int ret = 0;
        u16 pbl_count = 0;
@@ -2113,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
 
        next_stag_index >>= 8;
        next_stag_index %= nesadapter->max_mr;
-       if (num_phys_buf > (1024*512)) {
-               return ERR_PTR(-E2BIG);
-       }
 
-       if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+       if ((addr ^ *iova_start) & ~PAGE_MASK)
                return ERR_PTR(-EINVAL);
 
        err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2132,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                return ERR_PTR(-ENOMEM);
        }
 
-       for (i = 0; i < num_phys_buf; i++) {
+       /* Allocate a 4K buffer for the PBL */
+       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+                       &vpbl.pbl_pbase);
+       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+                       vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+       if (!vpbl.pbl_vbase) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               ibmr = ERR_PTR(-ENOMEM);
+               kfree(nesmr);
+               goto reg_phys_err;
+       }
 
-               if ((i & 0x01FF) == 0) {
-                       if (root_pbl_index == 1) {
-                               /* Allocate the root PBL */
-                               root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
-                                               &root_vpbl.pbl_pbase);
-                               nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
-                                               root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
-                               if (!root_vpbl.pbl_vbase) {
-                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                       vpbl.pbl_pbase);
-                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                       kfree(nesmr);
-                                       return ERR_PTR(-ENOMEM);
-                               }
-                               root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
-                               if (!root_vpbl.leaf_vpbl) {
-                                       pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                                                       root_vpbl.pbl_pbase);
-                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                       vpbl.pbl_pbase);
-                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                       kfree(nesmr);
-                                       return ERR_PTR(-ENOMEM);
-                               }
-                               root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
-                               root_vpbl.pbl_vbase[0].pa_high =
-                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                               root_vpbl.leaf_vpbl[0] = vpbl;
-                       }
-                       /* Allocate a 4K buffer for the PBL */
-                       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-                                       &vpbl.pbl_pbase);
-                       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
-                                       vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
-                       if (!vpbl.pbl_vbase) {
-                               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                               ibmr = ERR_PTR(-ENOMEM);
-                               kfree(nesmr);
-                               goto reg_phys_err;
-                       }
-                       /* Fill in the root table */
-                       if (1 <= root_pbl_index) {
-                               root_vpbl.pbl_vbase[root_pbl_index].pa_low =
-                                               cpu_to_le32((u32)vpbl.pbl_pbase);
-                               root_vpbl.pbl_vbase[root_pbl_index].pa_high =
-                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                               root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
-                       }
-                       root_pbl_index++;
-                       cur_pbl_index = 0;
-               }
 
-               mask = !buffer_list[i].size;
-               if (i != 0)
-                       mask |= buffer_list[i].addr;
-               if (i != num_phys_buf - 1)
-                       mask |= buffer_list[i].addr + buffer_list[i].size;
-
-               if (mask & ~PAGE_MASK) {
-                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                       nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
-                       ibmr = ERR_PTR(-EINVAL);
-                       kfree(nesmr);
-                       goto reg_phys_err;
-               }
+       mask = !size;
 
-               region_length += buffer_list[i].size;
-               if ((i != 0) && (single_page)) {
-                       if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
-                               single_page = 0;
-               }
-               vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
-               vpbl.pbl_vbase[cur_pbl_index++].pa_high =
-                               cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+       if (mask & ~PAGE_MASK) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+               ibmr = ERR_PTR(-EINVAL);
+               kfree(nesmr);
+               goto reg_phys_err;
        }
 
+       region_length += size;
+       vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+       vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
        stag = stag_index << 8;
        stag |= driver_key;
        stag += (u32)stag_key;
@@ -2219,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                        stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
 
        /* Make the leaf PBL the root if only one PBL */
-       if (root_pbl_index == 1) {
-               root_vpbl.pbl_pbase = vpbl.pbl_pbase;
-       }
+       root_vpbl.pbl_pbase = vpbl.pbl_pbase;
 
        if (single_page) {
                pbl_count = 0;
        } else {
-               pbl_count = root_pbl_index;
+               pbl_count = 1;
        }
        ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
-                       buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+                       addr, pbl_count, 1, acc, iova_start,
                        &nesmr->pbls_used, &nesmr->pbl_4k);
 
        if (ret == 0) {
@@ -2242,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
                ibmr = ERR_PTR(-ENOMEM);
        }
 
-       reg_phys_err:
-       /* free the resources */
-       if (root_pbl_index == 1) {
-               /* single PBL case */
-               pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
-       } else {
-               for (i=0; i<root_pbl_index; i++) {
-                       pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
-                                       root_vpbl.leaf_vpbl[i].pbl_pbase);
-               }
-               kfree(root_vpbl.leaf_vpbl);
-               pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                               root_vpbl.pbl_pbase);
-       }
-
+reg_phys_err:
+       /* single PBL case */
+       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
        return ibmr;
 }
 
@@ -2266,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
  */
 static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
 {
-       struct ib_phys_buf bl;
        u64 kva = 0;
 
        nes_debug(NES_DBG_MR, "\n");
 
-       bl.size = (u64)0xffffffffffULL;
-       bl.addr = 0;
-       return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+       return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
 }
 
-
 /**
  * nes_reg_user_mr
  */
@@ -3888,12 +3738,10 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
        nesibdev->ibdev.destroy_cq = nes_destroy_cq;
        nesibdev->ibdev.poll_cq = nes_poll_cq;
        nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
-       nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
        nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
        nesibdev->ibdev.dereg_mr = nes_dereg_mr;
        nesibdev->ibdev.alloc_mw = nes_alloc_mw;
        nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
-       nesibdev->ibdev.bind_mw = nes_bind_mw;
 
        nesibdev->ibdev.alloc_mr = nes_alloc_mr;
        nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
index a204b677af22a8d00fba4b5b65bc0620d4cb7a1c..70290883d06769f3eee19ed4b36750808c2fa3d8 100644 (file)
@@ -190,4 +190,8 @@ struct nes_qp {
        u8                    pau_state;
        __u64                 nesuqp_addr;
 };
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+               u64 addr, u64 size, int acc, u64 *iova_start);
+
 #endif                 /* NES_VERBS_H */
index 9820074be59d73bd1aac4b56d4a38254d2f9b7bb..3790771f2baad2bae17c52298e9b293eee3e7bad 100644 (file)
@@ -152,9 +152,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        if ((pd->uctx) &&
            (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
            (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
-               status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
-                                                   attr->dmac, &vlan_tag,
-                                                   sgid_attr.ndev->ifindex);
+               status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+                                                     attr->dmac, &vlan_tag,
+                                                     &sgid_attr.ndev->ifindex,
+                                                     NULL);
                if (status) {
                        pr_err("%s(): Failed to resolve dmac from gid." 
                                "status = %d\n", __func__, status);
index 3afb40b85159bd2c10559443536f83af5e4fa9ca..573849354cb94f4ef6f46397266518f9aaf7ef7b 100644 (file)
@@ -175,7 +175,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        dev->ibdev.req_notify_cq = ocrdma_arm_cq;
 
        dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
-       dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
        dev->ibdev.dereg_mr = ocrdma_dereg_mr;
        dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
index 76e96f97b3f6459e68d13c444be6e1beb6b578a9..d4c687b548d8696e66752da16a28f03d2481cd60 100644 (file)
@@ -3066,169 +3066,6 @@ pl_err:
        return ERR_PTR(-ENOMEM);
 }
 
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
-                                   int buf_cnt, u32 *pbe_size)
-{
-       u64 total_size = 0;
-       u64 buf_size = 0;
-       int i;
-       *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
-       *pbe_size = roundup_pow_of_two(*pbe_size);
-
-       /* find the smallest PBE size that we can have */
-       for (i = 0; i < buf_cnt; i++) {
-               /* first addr may not be page aligned, so ignore checking */
-               if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
-                                (buf_list[i].size & ~PAGE_MASK))) {
-                       return 0;
-               }
-
-               /* if configured PBE size is greater then the chosen one,
-                * reduce the PBE size.
-                */
-               buf_size = roundup(buf_list[i].size, PAGE_SIZE);
-               /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
-               buf_size = roundup_pow_of_two(buf_size);
-               if (*pbe_size > buf_size)
-                       *pbe_size = buf_size;
-
-               total_size += buf_size;
-       }
-       *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
-           (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
-
-       /* num_pbes = total_size / (*pbe_size);  this is implemented below. */
-
-       return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
-                             u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
-                             struct ocrdma_hw_mr *hwmr)
-{
-       int i;
-       int idx;
-       int pbes_per_buf = 0;
-       u64 buf_addr = 0;
-       int num_pbes;
-       struct ocrdma_pbe *pbe;
-       int total_num_pbes = 0;
-
-       if (!hwmr->num_pbes)
-               return;
-
-       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-       num_pbes = 0;
-
-       /* go through the OS phy regions & fill hw pbe entries into pbls. */
-       for (i = 0; i < ib_buf_cnt; i++) {
-               buf_addr = buf_list[i].addr;
-               pbes_per_buf =
-                   roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
-                   pbe_size;
-               hwmr->len += buf_list[i].size;
-               /* number of pbes can be more for one OS buf, when
-                * buffers are of different sizes.
-                * split the ib_buf to one or more pbes.
-                */
-               for (idx = 0; idx < pbes_per_buf; idx++) {
-                       /* we program always page aligned addresses,
-                        * first unaligned address is taken care by fbo.
-                        */
-                       if (i == 0) {
-                               /* for non zero fbo, assign the
-                                * start of the page.
-                                */
-                               pbe->pa_lo =
-                                   cpu_to_le32((u32) (buf_addr & PAGE_MASK));
-                               pbe->pa_hi =
-                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
-                       } else {
-                               pbe->pa_lo =
-                                   cpu_to_le32((u32) (buf_addr & 0xffffffff));
-                               pbe->pa_hi =
-                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
-                       }
-                       buf_addr += pbe_size;
-                       num_pbes += 1;
-                       total_num_pbes += 1;
-                       pbe++;
-
-                       if (total_num_pbes == hwmr->num_pbes)
-                               goto mr_tbl_done;
-                       /* if the pbl is full storing the pbes,
-                        * move to next pbl.
-                        */
-                       if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
-                               pbl_tbl++;
-                               pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-                               num_pbes = 0;
-                       }
-               }
-       }
-mr_tbl_done:
-       return;
-}
-
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
-                                  struct ib_phys_buf *buf_list,
-                                  int buf_cnt, int acc, u64 *iova_start)
-{
-       int status = -ENOMEM;
-       struct ocrdma_mr *mr;
-       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
-       u32 num_pbes;
-       u32 pbe_size = 0;
-
-       if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
-               return ERR_PTR(-EINVAL);
-
-       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(status);
-
-       num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
-       if (num_pbes == 0) {
-               status = -EINVAL;
-               goto pbl_err;
-       }
-       status = ocrdma_get_pbl_info(dev, mr, num_pbes);
-       if (status)
-               goto pbl_err;
-
-       mr->hwmr.pbe_size = pbe_size;
-       mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
-       mr->hwmr.va = *iova_start;
-       mr->hwmr.local_rd = 1;
-       mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
-       mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
-       mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
-       mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
-       mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-       status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
-       if (status)
-               goto pbl_err;
-       build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
-                         &mr->hwmr);
-       status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
-       if (status)
-               goto mbx_err;
-
-       mr->ibmr.lkey = mr->hwmr.lkey;
-       if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
-               mr->ibmr.rkey = mr->hwmr.lkey;
-       return &mr->ibmr;
-
-mbx_err:
-       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
-       kfree(mr);
-       return ERR_PTR(status);
-}
-
 static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
 {
        struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
index a2f3b4dc20b0363d0a09769aaee2d69dd957ae33..8b517fd3677924099a1d08663035de8f19a09d79 100644 (file)
@@ -117,9 +117,6 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
 
 int ocrdma_dereg_mr(struct ib_mr *);
 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
-                                  struct ib_phys_buf *buffer_list,
-                                  int num_phys_buf, int acc, u64 *iova_start);
 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
                                 u64 virt, int acc, struct ib_udata *);
 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
index 13ef22bd9459200309f2ab9fccb248eccbef9027..fcdf37913a264d52b5204f771523b37a2b0c4985 100644 (file)
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
 {
        int error;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(*dentry))
                error = qibfs_mknod(d_inode(parent), *dentry,
                                    mode, fops, data);
        else
                error = PTR_ERR(*dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        return error;
 }
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
        int ret, i;
 
        root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        snprintf(unit, sizeof(unit), "%u", dd->unit);
        dir = lookup_one_len(unit, root, strlen(unit));
 
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
                goto bail;
        }
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        remove_file(dir, "counters");
        remove_file(dir, "counter_names");
        remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
                }
        }
        remove_file(dir, "flash");
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = simple_rmdir(d_inode(root), dir);
        d_delete(dir);
        dput(dir);
 
 bail:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        dput(root);
        return ret;
 }
index 294f5c706be972b4b6563e37a32064fade665be2..5f53304e8a9b5a4c71448ba02865d912915d13a1 100644 (file)
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
        rval = init_qib_mregion(&mr->mr, pd, count);
        if (rval)
                goto bail;
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
+
        rval = qib_alloc_lkey(&mr->mr, 0);
        if (rval)
                goto bail_mregion;
@@ -170,52 +167,6 @@ bail:
        goto done;
 }
 
-/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
-                             struct ib_phys_buf *buffer_list,
-                             int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct qib_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, pd);
-       if (IS_ERR(mr)) {
-               ret = (struct ib_mr *)mr;
-               goto bail;
-       }
-
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.access_flags = acc;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == QIB_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
 /**
  * qib_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
index 40f85bb3e0d3bdce5289a5c8c9c2418df33e4ca4..3eff35c2d453f7b11f9475f5120cb4df26f94dda 100644 (file)
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
        32768                   /* 1E */
 };
 
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+                        gfp_t gfp)
 {
-       unsigned long page = get_zeroed_page(GFP_KERNEL);
+       unsigned long page = get_zeroed_page(gfp);
 
        /*
         * Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
 static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-                    enum ib_qp_type type, u8 port)
+                    enum ib_qp_type type, u8 port, gfp_t gfp)
 {
        u32 i, offset, max_scan, qpn;
        struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
        max_scan = qpt->nmaps - !offset;
        for (i = 0;;) {
                if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
+                       get_map_page(qpt, map, gfp);
                        if (unlikely(!map->page))
                                break;
                }
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
        size_t sz;
        size_t sg_list_sz;
        struct ib_qp *ret;
+       gfp_t gfp;
+
 
        if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
            init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
-           init_attr->create_flags) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
+           init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+               return ERR_PTR(-EINVAL);
+
+       /* GFP_NOIO is applicable in RC QPs only */
+       if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+           init_attr->qp_type != IB_QPT_RC)
+               return ERR_PTR(-EINVAL);
+
+       gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+                       GFP_NOIO : GFP_KERNEL;
 
        /* Check receive queue parameters if no SRQ is specified. */
        if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                sz = sizeof(struct qib_sge) *
                        init_attr->cap.max_send_sge +
                        sizeof(struct qib_swqe);
-               swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+               swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+                               gfp, PAGE_KERNEL);
                if (swq == NULL) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                } else if (init_attr->cap.max_recv_sge > 1)
                        sg_list_sz = sizeof(*qp->r_sg_list) *
                                (init_attr->cap.max_recv_sge - 1);
-               qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+               qp = kzalloc(sz + sg_list_sz, gfp);
                if (!qp) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_swq;
                }
                RCU_INIT_POINTER(qp->next, NULL);
-               qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+               qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
                if (!qp->s_hdr) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
                        sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
                                sizeof(struct qib_rwqe);
-                       qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
-                                                  qp->r_rq.size * sz);
+                       if (gfp != GFP_NOIO)
+                               qp->r_rq.wq = vmalloc_user(
+                                               sizeof(struct qib_rwq) +
+                                               qp->r_rq.size * sz);
+                       else
+                               qp->r_rq.wq = __vmalloc(
+                                               sizeof(struct qib_rwq) +
+                                               qp->r_rq.size * sz,
+                                               gfp, PAGE_KERNEL);
+
                        if (!qp->r_rq.wq) {
                                ret = ERR_PTR(-ENOMEM);
                                goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
                dev = to_idev(ibpd->device);
                dd = dd_from_dev(dev);
                err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
-                               init_attr->port_num);
+                               init_attr->port_num, gfp);
                if (err < 0) {
                        ret = ERR_PTR(err);
                        vfree(qp->r_rq.wq);
index de6cb6fcda8df3d5a1060aceecf20c832ba81ace..baf1e42b6896cbd0efcf29e0d52aeed3c20e1e21 100644 (file)
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
        unsigned long flags;
        struct qib_lkey_table *rkt;
        struct qib_pd *pd;
+       int avoid_schedule = 0;
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
            qp->ibqp.qp_type == IB_QPT_RC) {
                if (wqe->length > 0x80000000U)
                        goto bail_inval_free;
+               if (wqe->length <= qp->pmtu)
+                       avoid_schedule = 1;
        } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
-                                 qp->port_num - 1)->ibmtu)
+                                 qp->port_num - 1)->ibmtu) {
                goto bail_inval_free;
-       else
+       } else {
                atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+               avoid_schedule = 1;
+       }
        wqe->ssn = qp->s_ssn++;
        qp->s_head = next;
 
@@ -458,7 +463,7 @@ bail_inval_free:
 bail_inval:
        ret = -EINVAL;
 bail:
-       if (!ret && !wr->next &&
+       if (!ret && !wr->next && !avoid_schedule &&
         !qib_sdma_empty(
           dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
                qib_schedule_send(qp);
@@ -2256,7 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
        ibdev->poll_cq = qib_poll_cq;
        ibdev->req_notify_cq = qib_req_notify_cq;
        ibdev->get_dma_mr = qib_get_dma_mr;
-       ibdev->reg_phys_mr = qib_reg_phys_mr;
        ibdev->reg_user_mr = qib_reg_user_mr;
        ibdev->dereg_mr = qib_dereg_mr;
        ibdev->alloc_mr = qib_alloc_mr;
index bc803f33d5f6883200ca21e462febebb94393cd4..6c5e77753d858da2d9adb0ddfc1cd753b1e3fb07 100644 (file)
@@ -1032,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 
 struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
 
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
-                             struct ib_phys_buf *buffer_list,
-                             int num_phys_buf, int acc, u64 *iova_start);
-
 struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                              u64 virt_addr, int mr_access_flags,
                              struct ib_udata *udata);
index f8ea069a3eafca4078ab70848c3804867609143c..b2fb5286dbd98250534964887a8c2c46678cd725 100644 (file)
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        struct qib_ibdev *dev = to_idev(ibqp->device);
        struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
        struct qib_mcast *mcast = NULL;
-       struct qib_mcast_qp *p, *tmp;
+       struct qib_mcast_qp *p, *tmp, *delp = NULL;
        struct rb_node *n;
        int last = 0;
        int ret;
 
-       if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-               ret = -EINVAL;
-               goto bail;
-       }
+       if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+               return -EINVAL;
 
        spin_lock_irq(&ibp->lock);
 
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        while (1) {
                if (n == NULL) {
                        spin_unlock_irq(&ibp->lock);
-                       ret = -EINVAL;
-                       goto bail;
+                       return -EINVAL;
                }
 
                mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                 */
                list_del_rcu(&p->list);
                mcast->n_attached--;
+               delp = p;
 
                /* If this was the last attached QP, remove the GID too. */
                if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        }
 
        spin_unlock_irq(&ibp->lock);
+       /* QP not attached */
+       if (!delp)
+               return -EINVAL;
+       /*
+        * Wait for any list walkers to finish before freeing the
+        * list element.
+        */
+       wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+       qib_mcast_qp_free(delp);
 
-       if (p) {
-               /*
-                * Wait for any list walkers to finish before freeing the
-                * list element.
-                */
-               wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-               qib_mcast_qp_free(p);
-       }
        if (last) {
                atomic_dec(&mcast->refcount);
                wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                dev->n_mcast_grps_allocated--;
                spin_unlock_irq(&dev->n_mcast_grps_lock);
        }
-
-       ret = 0;
-
-bail:
-       return ret;
+       return 0;
 }
 
 int qib_mcast_tree_empty(struct qib_ibport *ibp)
index 5e55b8bc6fe402af423118c1454b5bc67d21d8ba..92dc66cc2d50a06ea69fb259f713784a26868e2b 100644 (file)
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
                                                        qp_flow,
                                                        &flowinfo_ops);
        if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
-               usnic_err("Failed to create dbg fs entry for flow %u\n",
-                               qp_flow->flow->flow_id);
+               usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+                               qp_flow->flow->flow_id,
+                               PTR_ERR(qp_flow->dbgfs_dentry));
        }
 }
 
index fcea3a24d3eb310ef0ee3c573a3ef9e161c8e809..5f44b66ccb86481774733960b3577e83fcdfabc0 100644 (file)
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
 
        if (!status) {
                qp_grp->state = new_state;
-               usnic_info("Transistioned %u from %s to %s",
+               usnic_info("Transitioned %u from %s to %s",
                qp_grp->grp_id,
                usnic_ib_qp_grp_state_to_string(old_state),
                usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
        return res_chunk_list;
 
 out_free_res:
-       for (i--; i > 0; i--)
+       for (i--; i >= 0; i--)
                usnic_vnic_put_resources(res_chunk_list[i]);
        kfree(res_chunk_list);
        return ERR_PTR(err);
index f8e3211689a3453164c9044f3bef4601053c1dba..6cdb4d23f78f99c0282be7d202083f5aa63091e7 100644 (file)
@@ -51,7 +51,7 @@
 
 static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
 {
-       *fw_ver = (u64) *fw_ver_str;
+       *fw_ver = *((u64 *)fw_ver_str);
 }
 
 static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
        qp_grp = to_uqp_grp(ibqp);
 
-       /* TODO: Future Support All States */
        mutex_lock(&qp_grp->vf->pf->usdev_lock);
-       if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
-       } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
-       } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
-               status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+       if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+               /* usnic devices only have one port */
+               status = -EINVAL;
+               goto out_unlock;
+       }
+       if (attr_mask & IB_QP_STATE) {
+               status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
        } else {
-               usnic_err("Unexpected combination mask: %u state: %u\n",
-                               attr_mask & IB_QP_STATE, attr->qp_state);
+               usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
                status = -EINVAL;
        }
 
+out_unlock:
        mutex_unlock(&qp_grp->vf->pf->usdev_lock);
        return status;
 }
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
                        virt_addr, length);
 
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (IS_ERR_OR_NULL(mr))
-               return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
 
        mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
                                        access_flags, 0);
index 414eaa566bd94e5f05a796a5416ef8f76e7939f6..0d9d2e6a14d5e73137e351036486dbfa2b21af60 100644 (file)
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
                          struct ib_udata *uhw);
 int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
                                struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
 int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
                                int qp_attr_mask,
                                struct ib_qp_init_attr *qp_init_attr);
index 66de93fb8ea934c15051f7b33fa7808306f19f05..8875107186902fe8092b24d15a2372aedb6592c1 100644 (file)
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
        struct usnic_vnic_res *res;
        int i;
 
-       if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+       if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
                return ERR_PTR(-EINVAL);
 
        ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
                return ERR_PTR(-ENOMEM);
        }
 
-       ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
-       if (!ret->res) {
-               usnic_err("Failed to allocate resources for %s. Out of memory\n",
-                               usnic_vnic_pci_name(vnic));
-               kfree(ret);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (cnt > 0) {
+               ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+               if (!ret->res) {
+                       usnic_err("Failed to allocate resources for %s. Out of memory\n",
+                                       usnic_vnic_pci_name(vnic));
+                       kfree(ret);
+                       return ERR_PTR(-ENOMEM);
+               }
 
-       spin_lock(&vnic->res_lock);
-       src = &vnic->chunks[type];
-       for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
-               res = src->res[i];
-               if (!res->owner) {
-                       src->free_cnt--;
-                       res->owner = owner;
-                       ret->res[ret->cnt++] = res;
+               spin_lock(&vnic->res_lock);
+               src = &vnic->chunks[type];
+               for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+                       res = src->res[i];
+                       if (!res->owner) {
+                               src->free_cnt--;
+                               res->owner = owner;
+                               ret->res[ret->cnt++] = res;
+                       }
                }
-       }
 
-       spin_unlock(&vnic->res_lock);
+               spin_unlock(&vnic->res_lock);
+       }
        ret->type = type;
        ret->vnic = vnic;
        WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
        int i;
        struct usnic_vnic *vnic = chunk->vnic;
 
-       spin_lock(&vnic->res_lock);
-       while ((i = --chunk->cnt) >= 0) {
-               res = chunk->res[i];
-               chunk->res[i] = NULL;
-               res->owner = NULL;
-               vnic->chunks[res->type].free_cnt++;
+       if (chunk->cnt > 0) {
+               spin_lock(&vnic->res_lock);
+               while ((i = --chunk->cnt) >= 0) {
+                       res = chunk->res[i];
+                       chunk->res[i] = NULL;
+                       res->owner = NULL;
+                       vnic->chunks[res->type].free_cnt++;
+               }
+               spin_unlock(&vnic->res_lock);
        }
-       spin_unlock(&vnic->res_lock);
 
        kfree(chunk->res);
        kfree(chunk);
index 3ede103097547d4355646d322b5570ebb421d2ec..a6f3eab0f350ef036cfb44a47fbbafe5f9bd1fc1 100644 (file)
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
 void ipoib_mcast_join_task(struct work_struct *work);
 void ipoib_mcast_carrier_on_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
                       union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+                               struct list_head *remove_list);
 
 int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
index 3ae9726efb9837512a62214705bf5e8e9561a02c..917e46ea3bf681681a4abba5e9ce6e86514d07eb 100644 (file)
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
 #define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
 
 static struct ib_send_wr ipoib_cm_rx_drain_wr = {
-       .wr_id = IPOIB_CM_RX_DRAIN_WRID,
        .opcode = IB_WR_SEND,
 };
 
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
         * error" WC will be immediately generated for each WR we post.
         */
        p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+       ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
        if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
                ipoib_warn(priv, "failed to post drain wr\n");
 
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 int ipoib_cm_dev_init(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       int i, ret;
-       struct ib_device_attr attr;
+       int max_srq_sge, i;
 
        INIT_LIST_HEAD(&priv->cm.passive_ids);
        INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
 
        skb_queue_head_init(&priv->cm.skb_queue);
 
-       ret = ib_query_device(priv->ca, &attr);
-       if (ret) {
-               printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
-               return ret;
-       }
-
-       ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+       ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
 
-       attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
-       ipoib_cm_create_srq(dev, attr.max_srq_sge);
+       max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+       ipoib_cm_create_srq(dev, max_srq_sge);
        if (ipoib_cm_has_srq(dev)) {
-               priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
-               priv->cm.num_frags  = attr.max_srq_sge;
+               priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+               priv->cm.num_frags  = max_srq_sge;
                ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
                          priv->cm.max_cm_mtu, priv->cm.num_frags);
        } else {
index 078cadd6c797afeb0e22267fb76e5362a4b97326..a53fa5fc0dec7d65900cce1133b2d4e2326183cc 100644 (file)
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
                              struct ethtool_drvinfo *drvinfo)
 {
        struct ipoib_dev_priv *priv = netdev_priv(netdev);
-       struct ib_device_attr *attr;
-
-       attr = kmalloc(sizeof(*attr), GFP_KERNEL);
-       if (attr && !ib_query_device(priv->ca, attr))
-               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                        "%d.%d.%d", (int)(attr->fw_ver >> 32),
-                        (int)(attr->fw_ver >> 16) & 0xffff,
-                        (int)attr->fw_ver & 0xffff);
-       kfree(attr);
+
+       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+                (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+                (int)priv->ca->attrs.fw_ver & 0xffff);
 
        strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
                sizeof(drvinfo->bus_info));
index 7d3281866ffcd520d4bca543a9ad0544f2ec159f..25509bbd4a050a076dba5d8bd88b718fa1a72778 100644 (file)
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
        unsigned long flags;
        int i;
        LIST_HEAD(remove_list);
-       struct ipoib_mcast *mcast, *tmcast;
-       struct net_device *dev = priv->dev;
 
        if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
                return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
                                                          lockdep_is_held(&priv->lock))) != NULL) {
                        /* was the neigh idle for two GC periods */
                        if (time_after(neigh_obsolete, neigh->alive)) {
-                               u8 *mgid = neigh->daddr + 4;
 
-                               /* Is this multicast ? */
-                               if (*mgid == 0xff) {
-                                       mcast = __ipoib_mcast_find(dev, mgid);
-
-                                       if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
-                                               list_del(&mcast->list);
-                                               rb_erase(&mcast->rb_node, &priv->multicast_tree);
-                                               list_add_tail(&mcast->list, &remove_list);
-                                       }
-                               }
+                               ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
 
                                rcu_assign_pointer(*np,
                                                   rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 
 out_unlock:
        spin_unlock_irqrestore(&priv->lock, flags);
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 }
 
 static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 
 int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 {
-       struct ib_device_attr *device_attr;
-       int result = -ENOMEM;
-
-       device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
-       if (!device_attr) {
-               printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
-                      hca->name, sizeof *device_attr);
-               return result;
-       }
-
-       result = ib_query_device(hca, device_attr);
-       if (result) {
-               printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
-                      hca->name, result);
-               kfree(device_attr);
-               return result;
-       }
-       priv->hca_caps = device_attr->device_cap_flags;
-
-       kfree(device_attr);
+       priv->hca_caps = hca->attrs.device_cap_flags;
 
        if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
                priv->dev->hw_features = NETIF_F_SG |
index f357ca67a41cd859b2ff5e704621f72813b91fa8..050dfa175d169dd3f77ee83767f1988062989891 100644 (file)
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
                queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 }
 
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 {
        struct net_device *dev = mcast->dev;
        int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
        return mcast;
 }
 
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct rb_node *n = priv->multicast_tree.rb_node;
@@ -677,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
        return 0;
 }
 
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret = 0;
@@ -704,6 +704,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
        return 0;
 }
 
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+                               struct list_head *remove_list)
+{
+       /* Is this multicast ? */
+       if (*mgid == 0xff) {
+               struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+               if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+                       list_del(&mcast->list);
+                       rb_erase(&mcast->rb_node, &priv->multicast_tree);
+                       list_add_tail(&mcast->list, remove_list);
+               }
+       }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+       struct ipoib_mcast *mcast, *tmcast;
+
+       list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+               ipoib_mcast_leave(mcast->dev, mcast);
+               ipoib_mcast_free(mcast);
+       }
+}
+
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +839,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
                if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 }
 
 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
                if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-               ipoib_mcast_leave(mcast->dev, mcast);
-               ipoib_mcast_free(mcast);
-       }
+       ipoib_mcast_remove_list(&remove_list);
 
        /*
         * Double check that we are still up
index 9080161e01af1614afa5796a134d7fd890a1be45..c827c93f46c547ce7ec810a2636723a7f3b582a8 100644 (file)
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 
                ib_conn = &iser_conn->ib_conn;
                if (ib_conn->pi_support) {
-                       u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+                       u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
 
                        scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
                        scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
                 * max fastreg page list length.
                 */
                shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
-                       ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+                       ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
                shost->max_sectors = min_t(unsigned int,
                        1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
 
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
        release_wq = alloc_workqueue("release workqueue", 0, 0);
        if (!release_wq) {
                iser_err("failed to allocate release workqueue\n");
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto err_alloc_wq;
        }
 
        iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
        if (!iscsi_iser_scsi_transport) {
                iser_err("iscsi_register_transport failed\n");
                err = -EINVAL;
-               goto register_transport_failure;
+               goto err_reg;
        }
 
        return 0;
 
-register_transport_failure:
+err_reg:
+       destroy_workqueue(release_wq);
+err_alloc_wq:
        kmem_cache_destroy(ig.desc_cache);
 
        return err;
index 8a5998e6a407997906e45864ac9799fdac468084..95f0a64e076b9addc5b01fc17e268f211c9e6f04 100644 (file)
@@ -48,6 +48,7 @@
 #include <scsi/scsi_transport_iscsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
+#include <scsi/iser.h>
 
 #include <linux/interrupt.h>
 #include <linux/wait.h>
                                         - ISER_MAX_RX_MISC_PDUS) /     \
                                         (1 + ISER_INFLIGHT_DATAOUTS))
 
-#define ISER_WC_BATCH_COUNT   16
 #define ISER_SIGNAL_CMD_COUNT 32
 
-#define ISER_VER                       0x10
-#define ISER_WSV                       0x08
-#define ISER_RSV                       0x04
-
-#define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
-#define ISER_BEACON_WRID               0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags:        flags support (zbva, remote_inv)
- * @rsvd:         reserved
- * @write_stag:   write rkey
- * @write_va:     write virtual address
- * @reaf_stag:    read rkey
- * @read_va:      read virtual address
- */
-struct iser_hdr {
-       u8      flags;
-       u8      rsvd[3];
-       __be32  write_stag;
-       __be64  write_va;
-       __be32  read_stag;
-       __be64  read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED                0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED  0x40
-
-struct iser_cm_hdr {
-       u8      flags;
-       u8      rsvd[3];
-} __packed;
-
 /* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN       (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
 
 #define ISER_RECV_DATA_SEG_LEN 128
 #define ISER_RX_PAYLOAD_SIZE   (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
 #define ISER_MAX_WRS 7
 
 /**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
  *
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
  * @sig_attrs:     Signature attributes
  */
 struct iser_tx_desc {
-       struct iser_hdr              iser_header;
+       struct iser_ctrl             iser_header;
        struct iscsi_hdr             iscsi_header;
        enum   iser_desc_type        type;
        u64                          dma_addr;
        struct ib_sge                tx_sg[2];
        int                          num_sge;
+       struct ib_cqe                cqe;
        bool                         mapped;
        u8                           wr_idx;
        union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
 };
 
 #define ISER_RX_PAD_SIZE       (256 - (ISER_RX_PAYLOAD_SIZE + \
-                                       sizeof(u64) + sizeof(struct ib_sge)))
+                                sizeof(u64) + sizeof(struct ib_sge) + \
+                                sizeof(struct ib_cqe)))
 /**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
  *
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
  * @pad:           for sense data TODO: Modify to maximum sense length supported
  */
 struct iser_rx_desc {
-       struct iser_hdr              iser_header;
+       struct iser_ctrl             iser_header;
        struct iscsi_hdr             iscsi_header;
        char                         data[ISER_RECV_DATA_SEG_LEN];
        u64                          dma_addr;
        struct ib_sge                rx_sg;
+       struct ib_cqe                cqe;
        char                         pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req:           pointer to login request buffer
+ * @resp:          pointer to login response buffer
+ * @req_dma:       DMA address of login request buffer
+ * @rsp_dma:      DMA address of login response buffer
+ * @sge:           IB sge for login post recv
+ * @cqe:           completion handler
+ */
+struct iser_login_desc {
+       void                         *req;
+       void                         *rsp;
+       u64                          req_dma;
+       u64                          rsp_dma;
+       struct ib_sge                sge;
+       struct ib_cqe                cqe;
 } __attribute__((packed));
 
 struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
 /**
  * struct iser_comp - iSER completion context
  *
- * @device:     pointer to device handle
  * @cq:         completion queue
- * @wcs:        work completion array
- * @tasklet:    Tasklet handle
  * @active_qps: Number of active QPs attached
  *              to completion context
  */
 struct iser_comp {
-       struct iser_device      *device;
        struct ib_cq            *cq;
-       struct ib_wc             wcs[ISER_WC_BATCH_COUNT];
-       struct tasklet_struct    tasklet;
        int                      active_qps;
 };
 
@@ -380,7 +361,6 @@ struct iser_reg_ops {
  *
  * @ib_device:     RDMA device
  * @pd:            Protection Domain for this device
- * @dev_attr:      Device attributes container
  * @mr:            Global DMA memory region
  * @event_handler: IB events handle routine
  * @ig_list:      entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
  *                 cpus and device max completion vectors
  * @comps:         Dinamically allocated array of completion handlers
  * @reg_ops:       Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
  */
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
-       struct ib_device_attr        dev_attr;
        struct ib_mr                 *mr;
        struct ib_event_handler      event_handler;
        struct list_head             ig_list;
        int                          refcount;
        int                          comps_used;
        struct iser_comp             *comps;
-       struct iser_reg_ops          *reg_ops;
+       const struct iser_reg_ops    *reg_ops;
+       bool                         remote_inv_sup;
 };
 
 #define ISER_CHECK_GUARD       0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
  * @rx_wr:               receive work request for batch posts
  * @device:              reference to iser device
  * @comp:                iser completion context
- * @pi_support:          Indicate device T10-PI support
- * @beacon:              beacon send wr to signal all flush errors were drained
- * @flush_comp:          completes when all connection completions consumed
  * @fr_pool:             connection fast registration poool
+ * @pi_support:          Indicate device T10-PI support
+ * @last:                last send wr to signal all flush errors were drained
+ * @last_cqe:            cqe handler for last wr
+ * @last_comp:           completes when all connection completions consumed
  */
 struct ib_conn {
        struct rdma_cm_id           *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
        struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
        struct iser_device          *device;
        struct iser_comp            *comp;
-       bool                         pi_support;
-       struct ib_send_wr            beacon;
-       struct completion            flush_comp;
        struct iser_fr_pool          fr_pool;
+       bool                         pi_support;
+       struct ib_send_wr            last;
+       struct ib_cqe                last_cqe;
+       struct ib_cqe                reg_cqe;
+       struct completion            last_comp;
 };
 
 /**
@@ -514,11 +498,7 @@ struct ib_conn {
  * @up_completion:    connection establishment completed
  *                    (state is ISER_CONN_UP)
  * @conn_list:        entry in ig conn list
- * @login_buf:        login data buffer (stores login parameters)
- * @login_req_buf:    login request buffer
- * @login_req_dma:    login request buffer dma address
- * @login_resp_buf:   login response buffer
- * @login_resp_dma:   login response buffer dma address
+ * @login_desc:       login descriptor
  * @rx_desc_head:     head of rx_descs cyclic buffer
  * @rx_descs:         rx buffers array (cyclic buffer)
  * @num_rx_descs:     number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
        struct completion            ib_completion;
        struct completion            up_completion;
        struct list_head             conn_list;
-
-       char                         *login_buf;
-       char                         *login_req_buf, *login_resp_buf;
-       u64                          login_req_dma, login_resp_dma;
+       struct iser_login_desc       login_desc;
        unsigned int                 rx_desc_head;
        struct iser_rx_desc          *rx_descs;
        u32                          num_rx_descs;
        unsigned short               scsi_sg_tablesize;
        unsigned int                 scsi_max_sectors;
+       bool                         snd_w_inv;
 };
 
 /**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
 
 struct iser_page_vec {
        u64 *pages;
-       int length;
-       int offset;
-       int data_size;
+       int npages;
+       struct ib_mr fake_mr;
 };
 
 /**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
 
 void iser_release_work(struct work_struct *work);
 
-void iser_rcv_completion(struct iser_rx_desc *desc,
-                        unsigned long dto_xfer_len,
-                        struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
-                        struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
 
 void iser_task_rdma_init(struct iscsi_iser_task *task);
 
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                     enum iser_data_dir cmd_dir);
 
 int iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                     enum iser_data_dir dir);
+                     enum iser_data_dir dir,
+                     bool all_imm);
 void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
                         enum iser_data_dir dir);
 
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
        return cur_wr;
 }
 
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+       return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+       return container_of(cqe, struct iser_login_desc, cqe);
+}
+
 #endif
index ffd00c42072959ed6747b5094e159cb34532675f..ed54b388e7adca899bfaec181c2f4c2ba75b1f47 100644 (file)
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_mem_reg *mem_reg;
        int err;
-       struct iser_hdr *hdr = &iser_task->desc.iser_header;
+       struct iser_ctrl *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
 
        err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
                        return err;
        }
 
-       err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+       err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
        if (err) {
                iser_err("Failed to set up Data-IN RDMA\n");
                return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_mem_reg *mem_reg;
        int err;
-       struct iser_hdr *hdr = &iser_task->desc.iser_header;
+       struct iser_ctrl *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
        struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
 
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                        return err;
        }
 
-       err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+       err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+                               buf_out->data_len == imm_sz);
        if (err != 0) {
                iser_err("Failed to register write cmd RDMA mem\n");
                return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn  *iser_conn,
        ib_dma_sync_single_for_cpu(device->ib_device,
                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
-       memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+       memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
        tx_desc->iser_header.flags = ISER_VER;
        tx_desc->num_sge = 1;
 }
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn        *iser_conn,
 static void iser_free_login_buf(struct iser_conn *iser_conn)
 {
        struct iser_device *device = iser_conn->ib_conn.device;
+       struct iser_login_desc *desc = &iser_conn->login_desc;
 
-       if (!iser_conn->login_buf)
+       if (!desc->req)
                return;
 
-       if (iser_conn->login_req_dma)
-               ib_dma_unmap_single(device->ib_device,
-                                   iser_conn->login_req_dma,
-                                   ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+       ib_dma_unmap_single(device->ib_device, desc->req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
 
-       if (iser_conn->login_resp_dma)
-               ib_dma_unmap_single(device->ib_device,
-                                   iser_conn->login_resp_dma,
-                                   ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+       ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+                           ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
 
-       kfree(iser_conn->login_buf);
+       kfree(desc->req);
+       kfree(desc->rsp);
 
        /* make sure we never redo any unmapping */
-       iser_conn->login_req_dma = 0;
-       iser_conn->login_resp_dma = 0;
-       iser_conn->login_buf = NULL;
+       desc->req = NULL;
+       desc->rsp = NULL;
 }
 
 static int iser_alloc_login_buf(struct iser_conn *iser_conn)
 {
        struct iser_device *device = iser_conn->ib_conn.device;
-       int                     req_err, resp_err;
-
-       BUG_ON(device == NULL);
-
-       iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
-                                    ISER_RX_LOGIN_SIZE, GFP_KERNEL);
-       if (!iser_conn->login_buf)
-               goto out_err;
-
-       iser_conn->login_req_buf  = iser_conn->login_buf;
-       iser_conn->login_resp_buf = iser_conn->login_buf +
-                                               ISCSI_DEF_MAX_RECV_SEG_LEN;
-
-       iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
-                                                    iser_conn->login_req_buf,
-                                                    ISCSI_DEF_MAX_RECV_SEG_LEN,
-                                                    DMA_TO_DEVICE);
-
-       iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
-                                                     iser_conn->login_resp_buf,
-                                                     ISER_RX_LOGIN_SIZE,
-                                                     DMA_FROM_DEVICE);
-
-       req_err  = ib_dma_mapping_error(device->ib_device,
-                                       iser_conn->login_req_dma);
-       resp_err = ib_dma_mapping_error(device->ib_device,
-                                       iser_conn->login_resp_dma);
-
-       if (req_err || resp_err) {
-               if (req_err)
-                       iser_conn->login_req_dma = 0;
-               if (resp_err)
-                       iser_conn->login_resp_dma = 0;
-               goto free_login_buf;
-       }
+       struct iser_login_desc *desc = &iser_conn->login_desc;
+
+       desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+       if (!desc->req)
+               return -ENOMEM;
+
+       desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+                                         ISCSI_DEF_MAX_RECV_SEG_LEN,
+                                         DMA_TO_DEVICE);
+       if (ib_dma_mapping_error(device->ib_device,
+                               desc->req_dma))
+               goto free_req;
+
+       desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+       if (!desc->rsp)
+               goto unmap_req;
+
+       desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+                                          ISER_RX_LOGIN_SIZE,
+                                          DMA_FROM_DEVICE);
+       if (ib_dma_mapping_error(device->ib_device,
+                               desc->rsp_dma))
+               goto free_rsp;
+
        return 0;
 
-free_login_buf:
-       iser_free_login_buf(iser_conn);
+free_rsp:
+       kfree(desc->rsp);
+unmap_req:
+       ib_dma_unmap_single(device->ib_device, desc->req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN,
+                           DMA_TO_DEVICE);
+free_req:
+       kfree(desc->req);
 
-out_err:
-       iser_err("unable to alloc or map login buf\n");
        return -ENOMEM;
 }
 
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
                        goto rx_desc_dma_map_failed;
 
                rx_desc->dma_addr = dma_addr;
-
+               rx_desc->cqe.done = iser_task_rsp;
                rx_sg = &rx_desc->rx_sg;
-               rx_sg->addr   = rx_desc->dma_addr;
+               rx_sg->addr = rx_desc->dma_addr;
                rx_sg->length = ISER_RX_PAYLOAD_SIZE;
-               rx_sg->lkey   = device->pd->local_dma_lkey;
+               rx_sg->lkey = device->pd->local_dma_lkey;
        }
 
        iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+       tx_desc->cqe.done = iser_cmd_comp;
        iser_create_send_desc(iser_conn, tx_desc);
 
        if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
        }
 
        tx_desc->type = ISCSI_TX_DATAOUT;
+       tx_desc->cqe.done = iser_dataout_comp;
        tx_desc->iser_header.flags = ISER_VER;
        memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
 
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        mdesc->type = ISCSI_TX_CONTROL;
+       mdesc->cqe.done = iser_ctrl_comp;
        iser_create_send_desc(iser_conn, mdesc);
 
        device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
        data_seg_len = ntoh24(task->hdr->dlength);
 
        if (data_seg_len > 0) {
+               struct iser_login_desc *desc = &iser_conn->login_desc;
                struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
                if (task != conn->login_task) {
                        iser_err("data present on non login task!!!\n");
                        goto send_control_error;
                }
 
-               ib_dma_sync_single_for_cpu(device->ib_device,
-                       iser_conn->login_req_dma, task->data_count,
-                       DMA_TO_DEVICE);
+               ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+                                          task->data_count, DMA_TO_DEVICE);
 
-               memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+               memcpy(desc->req, task->data, task->data_count);
 
-               ib_dma_sync_single_for_device(device->ib_device,
-                       iser_conn->login_req_dma, task->data_count,
-                       DMA_TO_DEVICE);
+               ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+                                             task->data_count, DMA_TO_DEVICE);
 
-               tx_dsg->addr    = iser_conn->login_req_dma;
-               tx_dsg->length  = task->data_count;
-               tx_dsg->lkey    = device->pd->local_dma_lkey;
+               tx_dsg->addr = desc->req_dma;
+               tx_dsg->length = task->data_count;
+               tx_dsg->lkey = device->pd->local_dma_lkey;
                mdesc->num_sge = 2;
        }
 
@@ -562,41 +556,126 @@ send_control_error:
        return err;
 }
 
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
-                        unsigned long rx_xfer_len,
-                        struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+       struct iser_login_desc *desc = iser_login(wc->wr_cqe);
        struct iscsi_hdr *hdr;
-       u64 rx_dma;
-       int rx_buflen, outstanding, count, err;
+       char *data;
+       int length;
 
-       /* differentiate between login to all other PDUs */
-       if ((char *)rx_desc == iser_conn->login_resp_buf) {
-               rx_dma = iser_conn->login_resp_dma;
-               rx_buflen = ISER_RX_LOGIN_SIZE;
-       } else {
-               rx_dma = rx_desc->dma_addr;
-               rx_buflen = ISER_RX_PAYLOAD_SIZE;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "login_rsp");
+               return;
+       }
+
+       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+                                  desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+                                  DMA_FROM_DEVICE);
+
+       hdr = desc->rsp + sizeof(struct iser_ctrl);
+       data = desc->rsp + ISER_HEADERS_LEN;
+       length = wc->byte_len - ISER_HEADERS_LEN;
+
+       iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+                hdr->itt, length);
+
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+       ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+                                     desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+                                     DMA_FROM_DEVICE);
+
+       ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+       if (likely(rkey == desc->rsc.mr->rkey))
+               desc->rsc.mr_valid = 0;
+       else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+               desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+                     struct ib_wc *wc,
+                     struct iscsi_hdr *hdr)
+{
+       if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+               struct iscsi_task *task;
+               u32 rkey = wc->ex.invalidate_rkey;
+
+               iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+                        iser_conn, rkey);
+
+               if (unlikely(!iser_conn->snd_w_inv)) {
+                       iser_err("conn %p: unexepected remote invalidation, "
+                                "terminating connection\n", iser_conn);
+                       return -EPROTO;
+               }
+
+               task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+               if (likely(task)) {
+                       struct iscsi_iser_task *iser_task = task->dd_data;
+                       struct iser_fr_desc *desc;
+
+                       if (iser_task->dir[ISER_DIR_IN]) {
+                               desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+                               iser_inv_desc(desc, rkey);
+                       }
+
+                       if (iser_task->dir[ISER_DIR_OUT]) {
+                               desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+                               iser_inv_desc(desc, rkey);
+                       }
+               } else {
+                       iser_err("failed to get task for itt=%d\n", hdr->itt);
+                       return -EINVAL;
+               }
        }
 
-       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
-                                  rx_buflen, DMA_FROM_DEVICE);
+       return 0;
+}
 
-       hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+       struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+       struct iscsi_hdr *hdr;
+       int length;
+       int outstanding, count, err;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "task_rsp");
+               return;
+       }
+
+       ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+                                  desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+                                  DMA_FROM_DEVICE);
+
+       hdr = &desc->iscsi_header;
+       length = wc->byte_len - ISER_HEADERS_LEN;
 
        iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
-                       hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+                hdr->itt, length);
+
+       if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+               iscsi_conn_failure(iser_conn->iscsi_conn,
+                                  ISCSI_ERR_CONN_FAILED);
+               return;
+       }
 
-       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
-                       rx_xfer_len - ISER_HEADERS_LEN);
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
 
-       ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
-                                     rx_buflen, DMA_FROM_DEVICE);
+       ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+                                     desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+                                     DMA_FROM_DEVICE);
 
        /* decrementing conn->post_recv_buf_count only --after-- freeing the   *
         * task eliminates the need to worry on tasks which are completed in   *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
         * for the posted rx bufs refcount to become zero handles everything   */
        ib_conn->post_recv_buf_count--;
 
-       if (rx_dma == iser_conn->login_resp_dma)
-               return;
-
        outstanding = ib_conn->post_recv_buf_count;
        if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
                count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
        }
 }
 
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
-                       struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
 {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
+               iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
        struct iscsi_task *task;
-       struct iser_device *device = ib_conn->device;
 
-       if (tx_desc->type == ISCSI_TX_DATAOUT) {
-               ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
-                                       ISER_HEADERS_LEN, DMA_TO_DEVICE);
-               kmem_cache_free(ig.desc_cache, tx_desc);
-               tx_desc = NULL;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               iser_err_comp(wc, "control");
+               return;
        }
 
-       if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
-               /* this arithmetic is legal by libiscsi dd_data allocation */
-               task = (void *) ((long)(void *)tx_desc -
-                                 sizeof(struct iscsi_task));
-               if (task->hdr->itt == RESERVED_ITT)
-                       iscsi_put_task(task);
-       }
+       /* this arithmetic is legal by libiscsi dd_data allocation */
+       task = (void *)desc - sizeof(struct iscsi_task);
+       if (task->hdr->itt == RESERVED_ITT)
+               iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+       struct iser_device *device = ib_conn->device;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS))
+               iser_err_comp(wc, "dataout");
+
+       ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+                           ISER_HEADERS_LEN, DMA_TO_DEVICE);
+       kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct ib_conn *ib_conn = wc->qp->qp_context;
+
+       complete(&ib_conn->last_comp);
 }
 
 void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
index ea765fb9664d36759ff1a90d11272c1bc13ca457..9a391cc5b9b3f45f16f0a49138aeda5b96030b01 100644 (file)
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                     struct iser_reg_resources *rsc,
                     struct iser_mem_reg *mem_reg);
 
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
        .alloc_reg_res  = iser_alloc_fastreg_pool,
        .free_reg_res   = iser_free_fastreg_pool,
        .reg_mem        = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
        .reg_desc_put   = iser_reg_desc_put_fr,
 };
 
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
        .alloc_reg_res  = iser_alloc_fmr_pool,
        .free_reg_res   = iser_free_fmr_pool,
        .reg_mem        = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
        .reg_desc_put   = iser_reg_desc_put_fmr,
 };
 
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+       iser_err_comp(wc, "memreg");
+}
+
 int iser_assign_reg_ops(struct iser_device *device)
 {
-       struct ib_device_attr *dev_attr = &device->dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
 
        /* Assign function handles  - based on FMR support */
-       if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
-           device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+       if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+           ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
                iser_info("FMR supported, using FMR for registration\n");
                device->reg_ops = &fmr_ops;
-       } else
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+       } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
                iser_info("FastReg supported, using FastReg for registration\n");
                device->reg_ops = &fastreg_ops;
+               device->remote_inv_sup = iser_always_reg;
        } else {
                iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
                return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
 {
 }
 
-#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
-                              struct ib_device *ibdev, u64 *pages,
-                              int *offset, int *data_size)
-{
-       struct scatterlist *sg, *sgl = data->sg;
-       u64 start_addr, end_addr, page, chunk_start = 0;
-       unsigned long total_sz = 0;
-       unsigned int dma_len;
-       int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
-       /* compute the offset of first element */
-       *offset = (u64) sgl[0].offset & ~MASK_4K;
-
-       new_chunk = 1;
-       cur_page  = 0;
-       for_each_sg(sgl, sg, data->dma_nents, i) {
-               start_addr = ib_sg_dma_address(ibdev, sg);
-               if (new_chunk)
-                       chunk_start = start_addr;
-               dma_len = ib_sg_dma_len(ibdev, sg);
-               end_addr = start_addr + dma_len;
-               total_sz += dma_len;
-
-               /* collect page fragments until aligned or end of SG list */
-               if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
-                       new_chunk = 0;
-                       continue;
-               }
-               new_chunk = 1;
-
-               /* address of the first page in the contiguous chunk;
-                  masking relevant for the very first SG entry,
-                  which might be unaligned */
-               page = chunk_start & MASK_4K;
-               do {
-                       pages[cur_page++] = page;
-                       page += SIZE_4K;
-               } while (page < end_addr);
-       }
-
-       *data_size = total_sz;
-       iser_dbg("page_vec->data_size:%d cur_page %d\n",
-                *data_size, cur_page);
-       return cur_page;
-}
-
 static void iser_data_buf_dump(struct iser_data_buf *data,
                               struct ib_device *ibdev)
 {
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
 {
        int i;
 
-       iser_err("page vec length %d data size %d\n",
-                page_vec->length, page_vec->data_size);
-       for (i = 0; i < page_vec->length; i++)
-               iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+       iser_err("page vec npages %d data length %d\n",
+                page_vec->npages, page_vec->fake_mr.length);
+       for (i = 0; i < page_vec->npages; i++)
+               iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
 }
 
 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        struct scatterlist *sg = mem->sg;
 
        reg->sge.lkey = device->pd->local_dma_lkey;
-       reg->rkey = device->mr->rkey;
+       /*
+        * FIXME: rework the registration code path to differentiate
+        * rkey/lkey use cases
+        */
+       reg->rkey = device->mr ? device->mr->rkey : 0;
        reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
        reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
 
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        return 0;
 }
 
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+       struct iser_page_vec *page_vec =
+               container_of(mr, struct iser_page_vec, fake_mr);
+
+       page_vec->pages[page_vec->npages++] = addr;
+
+       return 0;
+}
+
 static
 int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
                      struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
        struct ib_pool_fmr *fmr;
        int ret, plen;
 
-       plen = iser_sg_to_page_vec(mem, device->ib_device,
-                                  page_vec->pages,
-                                  &page_vec->offset,
-                                  &page_vec->data_size);
-       page_vec->length = plen;
-       if (plen * SIZE_4K < page_vec->data_size) {
+       page_vec->npages = 0;
+       page_vec->fake_mr.page_size = SIZE_4K;
+       plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+                             mem->size, iser_set_page);
+       if (unlikely(plen < mem->size)) {
                iser_err("page vec too short to hold this SG\n");
                iser_data_buf_dump(mem, device->ib_device);
                iser_dump_page_vec(page_vec);
                return -EINVAL;
        }
 
-       fmr  = ib_fmr_pool_map_phys(fmr_pool,
-                                   page_vec->pages,
-                                   page_vec->length,
-                                   page_vec->pages[0]);
+       fmr  = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+                                   page_vec->npages, page_vec->pages[0]);
        if (IS_ERR(fmr)) {
                ret = PTR_ERR(fmr);
                iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
 
        reg->sge.lkey = fmr->fmr->lkey;
        reg->rkey = fmr->fmr->rkey;
-       reg->sge.addr = page_vec->pages[0] + page_vec->offset;
-       reg->sge.length = page_vec->data_size;
+       reg->sge.addr = page_vec->fake_mr.iova;
+       reg->sge.length = page_vec->fake_mr.length;
        reg->mem_h = fmr;
 
        iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
                *mask |= ISER_CHECK_GUARD;
 }
 
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+             struct ib_mr *mr,
+             struct ib_cqe *cqe)
 {
-       u32 rkey;
-
        inv_wr->opcode = IB_WR_LOCAL_INV;
-       inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+       inv_wr->wr_cqe = cqe;
        inv_wr->ex.invalidate_rkey = mr->rkey;
        inv_wr->send_flags = 0;
        inv_wr->num_sge = 0;
-
-       rkey = ib_inc_rkey(mr->rkey);
-       ib_update_fast_reg_key(mr, rkey);
 }
 
 static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
        struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+       struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
        struct ib_sig_handover_wr *wr;
+       struct ib_mr *mr = pi_ctx->sig_mr;
        int ret;
 
        memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
        iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
 
-       if (!pi_ctx->sig_mr_valid)
-               iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+       if (pi_ctx->sig_mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+       ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
        wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
        wr->wr.opcode = IB_WR_REG_SIG_MR;
-       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.wr_cqe = cqe;
        wr->wr.sg_list = &data_reg->sge;
        wr->wr.num_sge = 1;
        wr->wr.send_flags = 0;
        wr->sig_attrs = sig_attrs;
-       wr->sig_mr = pi_ctx->sig_mr;
+       wr->sig_mr = mr;
        if (scsi_prot_sg_count(iser_task->sc))
                wr->prot = &prot_reg->sge;
        else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
        wr->access_flags = IB_ACCESS_LOCAL_WRITE |
                           IB_ACCESS_REMOTE_READ |
                           IB_ACCESS_REMOTE_WRITE;
-       pi_ctx->sig_mr_valid = 0;
+       pi_ctx->sig_mr_valid = 1;
 
-       sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
-       sig_reg->rkey = pi_ctx->sig_mr->rkey;
+       sig_reg->sge.lkey = mr->lkey;
+       sig_reg->rkey = mr->rkey;
        sig_reg->sge.addr = 0;
        sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
 
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                            struct iser_mem_reg *reg)
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
+       struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
        struct ib_mr *mr = rsc->mr;
        struct ib_reg_wr *wr;
        int n;
 
-       if (!rsc->mr_valid)
-               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+       if (rsc->mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+       ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
        n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
        if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 
        wr = reg_wr(iser_tx_next_wr(tx_desc));
        wr->wr.opcode = IB_WR_REG_MR;
-       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.wr_cqe = cqe;
        wr->wr.send_flags = 0;
        wr->wr.num_sge = 0;
        wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                     IB_ACCESS_REMOTE_WRITE |
                     IB_ACCESS_REMOTE_READ;
 
-       rsc->mr_valid = 0;
+       rsc->mr_valid = 1;
 
        reg->sge.lkey = mr->lkey;
        reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
 }
 
 int iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                     enum iser_data_dir dir)
+                     enum iser_data_dir dir,
+                     bool all_imm)
 {
        struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
        bool use_dma_key;
        int err;
 
-       use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
-                      scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+       use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+                     scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
 
        if (!use_dma_key) {
                desc = device->reg_ops->reg_desc_get(ib_conn);
index 42f4da620f2e9f97062b9183b6ebf3ed54e56cfe..40c0f4978e2f00b5173001f54ec41a4bc651b839 100644 (file)
 #define ISER_MAX_CQ_LEN                (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
                                 ISCSI_ISER_MAX_CONN)
 
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
-       iser_err("cq event %s (%d)\n",
-                ib_event_msg(cause->event), cause->event);
-}
-
 static void iser_qp_event_callback(struct ib_event *cause, void *context)
 {
        iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
-       struct ib_device_attr *dev_attr = &device->dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
        int ret, i, max_cqe;
 
-       ret = ib_query_device(device->ib_device, dev_attr);
-       if (ret) {
-               pr_warn("Query device failed for %s\n", device->ib_device->name);
-               return ret;
-       }
-
        ret = iser_assign_reg_ops(device);
        if (ret)
                return ret;
 
        device->comps_used = min_t(int, num_online_cpus(),
-                                device->ib_device->num_comp_vectors);
+                                ib_dev->num_comp_vectors);
 
        device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
                                GFP_KERNEL);
        if (!device->comps)
                goto comps_err;
 
-       max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+       max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
 
        iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
-                 device->comps_used, device->ib_device->name,
-                 device->ib_device->num_comp_vectors, max_cqe);
+                 device->comps_used, ib_dev->name,
+                 ib_dev->num_comp_vectors, max_cqe);
 
-       device->pd = ib_alloc_pd(device->ib_device);
+       device->pd = ib_alloc_pd(ib_dev);
        if (IS_ERR(device->pd))
                goto pd_err;
 
        for (i = 0; i < device->comps_used; i++) {
-               struct ib_cq_init_attr cq_attr = {};
                struct iser_comp *comp = &device->comps[i];
 
-               comp->device = device;
-               cq_attr.cqe = max_cqe;
-               cq_attr.comp_vector = i;
-               comp->cq = ib_create_cq(device->ib_device,
-                                       iser_cq_callback,
-                                       iser_cq_event_callback,
-                                       (void *)comp,
-                                       &cq_attr);
+               comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+                                      IB_POLL_SOFTIRQ);
                if (IS_ERR(comp->cq)) {
                        comp->cq = NULL;
                        goto cq_err;
                }
-
-               if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
-                       goto cq_err;
-
-               tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
-                            (unsigned long)comp);
        }
 
        if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
 
                device->mr = ib_get_dma_mr(device->pd, access);
                if (IS_ERR(device->mr))
-                       goto dma_mr_err;
+                       goto cq_err;
        }
 
-       INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
-                               iser_event_handler);
+       INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+                             iser_event_handler);
        if (ib_register_event_handler(&device->event_handler))
                goto handler_err;
 
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
 handler_err:
        if (device->mr)
                ib_dereg_mr(device->mr);
-dma_mr_err:
-       for (i = 0; i < device->comps_used; i++)
-               tasklet_kill(&device->comps[i].tasklet);
 cq_err:
        for (i = 0; i < device->comps_used; i++) {
                struct iser_comp *comp = &device->comps[i];
 
                if (comp->cq)
-                       ib_destroy_cq(comp->cq);
+                       ib_free_cq(comp->cq);
        }
        ib_dealloc_pd(device->pd);
 pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
        for (i = 0; i < device->comps_used; i++) {
                struct iser_comp *comp = &device->comps[i];
 
-               tasklet_kill(&comp->tasklet);
-               ib_destroy_cq(comp->cq);
+               ib_free_cq(comp->cq);
                comp->cq = NULL;
        }
 
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
                iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
                return ret;
        }
-       res->mr_valid = 1;
+       res->mr_valid = 0;
 
        return 0;
 }
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
                ret = PTR_ERR(pi_ctx->sig_mr);
                goto sig_mr_failure;
        }
-       pi_ctx->sig_mr_valid = 1;
+       pi_ctx->sig_mr_valid = 0;
        desc->pi_ctx->sig_protected = 0;
 
        return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
  */
 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
+       struct iser_conn *iser_conn = to_iser_conn(ib_conn);
        struct iser_device      *device;
-       struct ib_device_attr *dev_attr;
+       struct ib_device        *ib_dev;
        struct ib_qp_init_attr  init_attr;
        int                     ret = -ENOMEM;
        int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
        BUG_ON(ib_conn->device == NULL);
 
        device = ib_conn->device;
-       dev_attr = &device->dev_attr;
+       ib_dev = device->ib_device;
 
        memset(&init_attr, 0, sizeof init_attr);
 
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
                iser_conn->max_cmds =
                        ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
        } else {
-               if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+               if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
                        init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
                        iser_conn->max_cmds =
                                ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
                } else {
-                       init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+                       init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
                        iser_conn->max_cmds =
-                               ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+                               ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
                        iser_dbg("device %s supports max_send_wr %d\n",
-                                device->ib_device->name, dev_attr->max_qp_wr);
+                                device->ib_device->name, ib_dev->attrs.max_qp_wr);
                }
        }
 
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
                                 iser_conn, err);
 
                /* post an indication that all flush errors were consumed */
-               err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+               err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
                if (err) {
-                       iser_err("conn %p failed to post beacon", ib_conn);
+                       iser_err("conn %p failed to post last wr", ib_conn);
                        return 1;
                }
 
-               wait_for_completion(&ib_conn->flush_comp);
+               wait_for_completion(&ib_conn->last_comp);
        }
 
        return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
 
        sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
        sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
-                                device->dev_attr.max_fast_reg_page_list_len);
+                                device->ib_device->attrs.max_fast_reg_page_list_len);
 
        if (sg_tablesize > sup_sg_tablesize) {
                sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 
        /* connection T10-PI support */
        if (iser_pi_enable) {
-               if (!(device->dev_attr.device_cap_flags &
+               if (!(device->ib_device->attrs.device_cap_flags &
                      IB_DEVICE_SIGNATURE_HANDOVER)) {
                        iser_warn("T10-PI requested but not supported on %s, "
                                  "continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
                goto failure;
 
        memset(&conn_param, 0, sizeof conn_param);
-       conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+       conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
        conn_param.initiator_depth     = 1;
        conn_param.retry_count         = 7;
        conn_param.rnr_retry_count     = 6;
 
        memset(&req_hdr, 0, sizeof(req_hdr));
-       req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
-                       ISER_SEND_W_INV_NOT_SUPPORTED);
-       conn_param.private_data         = (void *)&req_hdr;
-       conn_param.private_data_len     = sizeof(struct iser_cm_hdr);
+       req_hdr.flags = ISER_ZBVA_NOT_SUP;
+       if (!device->remote_inv_sup)
+               req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+       conn_param.private_data = (void *)&req_hdr;
+       conn_param.private_data_len = sizeof(struct iser_cm_hdr);
 
        ret = rdma_connect(cma_id, &conn_param);
        if (ret) {
@@ -863,7 +829,8 @@ failure:
        iser_connect_error(cma_id);
 }
 
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+                                  const void *private_data)
 {
        struct iser_conn *iser_conn;
        struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
        (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
        iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
+       if (private_data) {
+               u8 flags = *(u8 *)private_data;
+
+               iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+       }
+
+       iser_info("conn %p: negotiated %s invalidation\n",
+                 iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
        iser_conn->state = ISER_CONN_UP;
        complete(&iser_conn->up_completion);
 }
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
                iser_route_handler(cma_id);
                break;
        case RDMA_CM_EVENT_ESTABLISHED:
-               iser_connected_handler(cma_id);
+               iser_connected_handler(cma_id, event->param.conn.private_data);
                break;
        case RDMA_CM_EVENT_ADDR_ERROR:
        case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 
 void iser_conn_init(struct iser_conn *iser_conn)
 {
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
        iser_conn->state = ISER_CONN_INIT;
-       iser_conn->ib_conn.post_recv_buf_count = 0;
-       init_completion(&iser_conn->ib_conn.flush_comp);
        init_completion(&iser_conn->stop_completion);
        init_completion(&iser_conn->ib_completion);
        init_completion(&iser_conn->up_completion);
        INIT_LIST_HEAD(&iser_conn->conn_list);
        mutex_init(&iser_conn->state_mutex);
+
+       ib_conn->post_recv_buf_count = 0;
+       ib_conn->reg_cqe.done = iser_reg_comp;
+       ib_conn->last_cqe.done = iser_last_comp;
+       ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+       ib_conn->last.opcode = IB_WR_SEND;
+       init_completion(&ib_conn->last_comp);
 }
 
  /**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn   *iser_conn,
 
        iser_conn->state = ISER_CONN_PENDING;
 
-       ib_conn->beacon.wr_id = ISER_BEACON_WRID;
-       ib_conn->beacon.opcode = IB_WR_SEND;
-
        ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
                                         (void *)iser_conn,
                                         RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
 
 int iser_post_recvl(struct iser_conn *iser_conn)
 {
-       struct ib_recv_wr rx_wr, *rx_wr_failed;
        struct ib_conn *ib_conn = &iser_conn->ib_conn;
-       struct ib_sge     sge;
+       struct iser_login_desc *desc = &iser_conn->login_desc;
+       struct ib_recv_wr wr, *wr_failed;
        int ib_ret;
 
-       sge.addr   = iser_conn->login_resp_dma;
-       sge.length = ISER_RX_LOGIN_SIZE;
-       sge.lkey   = ib_conn->device->pd->local_dma_lkey;
+       desc->sge.addr = desc->rsp_dma;
+       desc->sge.length = ISER_RX_LOGIN_SIZE;
+       desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
 
-       rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
-       rx_wr.sg_list = &sge;
-       rx_wr.num_sge = 1;
-       rx_wr.next    = NULL;
+       desc->cqe.done = iser_login_rsp;
+       wr.wr_cqe = &desc->cqe;
+       wr.sg_list = &desc->sge;
+       wr.num_sge = 1;
+       wr.next = NULL;
 
        ib_conn->post_recv_buf_count++;
-       ib_ret  = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+       ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
        if (ib_ret) {
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count--;
        }
+
        return ib_ret;
 }
 
 int iser_post_recvm(struct iser_conn *iser_conn, int count)
 {
-       struct ib_recv_wr *rx_wr, *rx_wr_failed;
-       int i, ib_ret;
        struct ib_conn *ib_conn = &iser_conn->ib_conn;
        unsigned int my_rx_head = iser_conn->rx_desc_head;
        struct iser_rx_desc *rx_desc;
+       struct ib_recv_wr *wr, *wr_failed;
+       int i, ib_ret;
 
-       for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
-               rx_desc         = &iser_conn->rx_descs[my_rx_head];
-               rx_wr->wr_id    = (uintptr_t)rx_desc;
-               rx_wr->sg_list  = &rx_desc->rx_sg;
-               rx_wr->num_sge  = 1;
-               rx_wr->next     = rx_wr + 1;
+       for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+               rx_desc = &iser_conn->rx_descs[my_rx_head];
+               rx_desc->cqe.done = iser_task_rsp;
+               wr->wr_cqe = &rx_desc->cqe;
+               wr->sg_list = &rx_desc->rx_sg;
+               wr->num_sge = 1;
+               wr->next = wr + 1;
                my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
        }
 
-       rx_wr--;
-       rx_wr->next = NULL; /* mark end of work requests list */
+       wr--;
+       wr->next = NULL; /* mark end of work requests list */
 
        ib_conn->post_recv_buf_count += count;
-       ib_ret  = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+       ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
        if (ib_ret) {
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count -= count;
        } else
                iser_conn->rx_desc_head = my_rx_head;
+
        return ib_ret;
 }
 
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
                                      DMA_TO_DEVICE);
 
        wr->next = NULL;
-       wr->wr_id = (uintptr_t)tx_desc;
+       wr->wr_cqe = &tx_desc->cqe;
        wr->sg_list = tx_desc->tx_sg;
        wr->num_sge = tx_desc->num_sge;
        wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
        return ib_ret;
 }
 
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- *     is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
-       void *start = iser_conn->rx_descs;
-       int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
-       if (wr_id >= start && wr_id < start + len)
-               return false;
-
-       return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn:   connection RDMA resources
- * @wc:        work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- *        we only cleanup in case TX type was DATAOUT. For non-FLUSH
- *        error completion we should also notify iscsi layer that
- *        connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
-                      struct ib_wc *wc)
-{
-       void *wr_id = (void *)(uintptr_t)wc->wr_id;
-       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
-                                                  ib_conn);
-
-       if (wc->status != IB_WC_WR_FLUSH_ERR)
-               if (iser_conn->iscsi_conn)
-                       iscsi_conn_failure(iser_conn->iscsi_conn,
-                                          ISCSI_ERR_CONN_FAILED);
-
-       if (wc->wr_id == ISER_FASTREG_LI_WRID)
-               return;
-
-       if (is_iser_tx_desc(iser_conn, wr_id)) {
-               struct iser_tx_desc *desc = wr_id;
-
-               if (desc->type == ISCSI_TX_DATAOUT)
-                       kmem_cache_free(ig.desc_cache, desc);
-       } else {
-               ib_conn->post_recv_buf_count--;
-       }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
-       struct ib_conn *ib_conn;
-       struct iser_tx_desc *tx_desc;
-       struct iser_rx_desc *rx_desc;
-
-       ib_conn = wc->qp->qp_context;
-       if (likely(wc->status == IB_WC_SUCCESS)) {
-               if (wc->opcode == IB_WC_RECV) {
-                       rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-                       iser_rcv_completion(rx_desc, wc->byte_len,
-                                           ib_conn);
-               } else
-               if (wc->opcode == IB_WC_SEND) {
-                       tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-                       iser_snd_completion(tx_desc, ib_conn);
-               } else {
-                       iser_err("Unknown wc opcode %d\n", wc->opcode);
-               }
-       } else {
-               if (wc->status != IB_WC_WR_FLUSH_ERR)
-                       iser_err("%s (%d): wr id %llx vend_err %x\n",
-                                ib_wc_status_msg(wc->status), wc->status,
-                                wc->wr_id, wc->vendor_err);
-               else
-                       iser_dbg("%s (%d): wr id %llx\n",
-                                ib_wc_status_msg(wc->status), wc->status,
-                                wc->wr_id);
-
-               if (wc->wr_id == ISER_BEACON_WRID)
-                       /* all flush errors were consumed */
-                       complete(&ib_conn->flush_comp);
-               else
-                       iser_handle_comp_error(ib_conn, wc);
-       }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
-       struct iser_comp *comp = (struct iser_comp *)data;
-       struct ib_cq *cq = comp->cq;
-       struct ib_wc *const wcs = comp->wcs;
-       int i, n, completed = 0;
-
-       while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
-               for (i = 0; i < n; i++)
-                       iser_handle_wc(&wcs[i]);
-
-               completed += n;
-               if (completed >= iser_cq_poll_limit)
-                       break;
-       }
-
-       /*
-        * It is assumed here that arming CQ only once its empty
-        * would not cause interrupts to be missed.
-        */
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
-       iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
-       struct iser_comp *comp = cq_context;
-
-       tasklet_schedule(&comp->tasklet);
-}
-
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                             enum iser_data_dir cmd_dir, sector_t *sector)
 {
@@ -1319,3 +1160,21 @@ err:
        /* Not alot we can do here, return ambiguous guard error */
        return 0x1;
 }
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+       if (wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+               iser_err("%s failure: %s (%d) vend_err %x\n", type,
+                        ib_wc_status_msg(wc->status), wc->status,
+                        wc->vendor_err);
+
+               if (iser_conn->iscsi_conn)
+                       iscsi_conn_failure(iser_conn->iscsi_conn,
+                                          ISCSI_ERR_CONN_FAILED);
+       } else {
+               iser_dbg("%s failure: %s (%d)\n", type,
+                        ib_wc_status_msg(wc->status), wc->status);
+       }
+}
index 468c5e13256368c8b23b5ab25c903ead808f490f..f121e612933913dc9b24ccdbb41f63a5d8f09b84 100644 (file)
@@ -29,7 +29,6 @@
 #include <target/iscsi/iscsi_transport.h>
 #include <linux/semaphore.h>
 
-#include "isert_proto.h"
 #include "ib_isert.h"
 
 #define        ISERT_MAX_CONN          8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
        }
 }
 
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
-       int ret;
-
-       ret = ib_query_device(ib_dev, devattr);
-       if (ret) {
-               isert_err("ib_query_device() failed: %d\n", ret);
-               return ret;
-       }
-       isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
-       isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
-       return 0;
-}
-
 static struct isert_comp *
 isert_comp_get(struct isert_conn *isert_conn)
 {
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
        attr.recv_cq = comp->cq;
        attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
        attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
-       attr.cap.max_send_sge = device->dev_attr.max_sge;
-       isert_conn->max_sge = min(device->dev_attr.max_sge,
-                                 device->dev_attr.max_sge_rd);
+       attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+       isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+                                 device->ib_device->attrs.max_sge_rd);
        attr.cap.max_recv_sge = 1;
        attr.sq_sig_type = IB_SIGNAL_REQ_WR;
        attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
 }
 
 static int
-isert_alloc_comps(struct isert_device *device,
-                 struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
 {
        int i, max_cqe, ret = 0;
 
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
                return -ENOMEM;
        }
 
-       max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+       max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
 
        for (i = 0; i < device->comps_used; i++) {
                struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
-       struct ib_device_attr *dev_attr;
+       struct ib_device *ib_dev = device->ib_device;
        int ret;
 
-       dev_attr = &device->dev_attr;
-       ret = isert_query_device(device->ib_device, dev_attr);
-       if (ret)
-               goto out;
+       isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+       isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
 
        /* asign function handlers */
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
-           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+       if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+           ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
                device->use_fastreg = 1;
                device->reg_rdma_mem = isert_reg_rdma;
                device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
                device->unreg_rdma_mem = isert_unmap_cmd;
        }
 
-       ret = isert_alloc_comps(device, dev_attr);
+       ret = isert_alloc_comps(device);
        if (ret)
                goto out;
 
-       device->pd = ib_alloc_pd(device->ib_device);
+       device->pd = ib_alloc_pd(ib_dev);
        if (IS_ERR(device->pd)) {
                ret = PTR_ERR(device->pd);
                isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
        }
 
        /* Check signature cap */
-       device->pi_capable = dev_attr->device_cap_flags &
+       device->pi_capable = ib_dev->attrs.device_cap_flags &
                             IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
 
        return 0;
@@ -676,6 +656,32 @@ out_login_buf:
        return ret;
 }
 
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+                     struct rdma_conn_param *param)
+{
+       struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+       /* Set max inflight RDMA READ requests */
+       isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+                               attr->max_qp_init_rd_atom);
+       isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+       if (param->private_data) {
+               u8 flags = *(u8 *)param->private_data;
+
+               /*
+                * use remote invalidation if the both initiator
+                * and the HCA support it
+                */
+               isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+                                         (attr->device_cap_flags &
+                                          IB_DEVICE_MEM_MGT_EXTENSIONS);
+               if (isert_conn->snd_w_inv)
+                       isert_info("Using remote invalidation\n");
+       }
+}
+
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        }
        isert_conn->device = device;
 
-       /* Set max inflight RDMA READ requests */
-       isert_conn->initiator_depth = min_t(u8,
-                               event->param.conn.initiator_depth,
-                               device->dev_attr.max_qp_init_rd_atom);
-       isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+       isert_set_nego_params(isert_conn, &event->param.conn);
 
        ret = isert_conn_setup_qp(isert_conn, cma_id);
        if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
        ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
                                   ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
-       memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
-       tx_desc->iser_header.flags = ISER_VER;
+       memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+       tx_desc->iser_header.flags = ISCSI_CTRL;
 
        tx_desc->num_sge = 1;
        tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 
        isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
        send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
-       send_wr->opcode = IB_WR_SEND;
+
+       if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+               send_wr->opcode  = IB_WR_SEND_WITH_INV;
+               send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+       } else {
+               send_wr->opcode = IB_WR_SEND;
+       }
+
        send_wr->sg_list = &tx_desc->tx_sg[0];
        send_wr->num_sge = isert_cmd->tx_desc.num_sge;
        send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
                isert_cmd->read_va = read_va;
                isert_cmd->write_stag = write_stag;
                isert_cmd->write_va = write_va;
+               isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
 
                ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
                                        rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 static void
 isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 {
-       struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+       struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
        uint64_t read_va = 0, write_va = 0;
        uint32_t read_stag = 0, write_stag = 0;
 
-       switch (iser_hdr->flags & 0xF0) {
+       switch (iser_ctrl->flags & 0xF0) {
        case ISCSI_CTRL:
-               if (iser_hdr->flags & ISER_RSV) {
-                       read_stag = be32_to_cpu(iser_hdr->read_stag);
-                       read_va = be64_to_cpu(iser_hdr->read_va);
+               if (iser_ctrl->flags & ISER_RSV) {
+                       read_stag = be32_to_cpu(iser_ctrl->read_stag);
+                       read_va = be64_to_cpu(iser_ctrl->read_va);
                        isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
                                  read_stag, (unsigned long long)read_va);
                }
-               if (iser_hdr->flags & ISER_WSV) {
-                       write_stag = be32_to_cpu(iser_hdr->write_stag);
-                       write_va = be64_to_cpu(iser_hdr->write_va);
+               if (iser_ctrl->flags & ISER_WSV) {
+                       write_stag = be32_to_cpu(iser_ctrl->write_stag);
+                       write_va = be64_to_cpu(iser_ctrl->write_va);
                        isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
                                  write_stag, (unsigned long long)write_va);
                }
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
                isert_err("iSER Hello message\n");
                break;
        default:
-               isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+               isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
                break;
        }
 
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
        struct rdma_cm_id *cm_id = isert_conn->cm_id;
        struct rdma_conn_param cp;
        int ret;
+       struct iser_cm_hdr rsp_hdr;
 
        memset(&cp, 0, sizeof(struct rdma_conn_param));
        cp.initiator_depth = isert_conn->initiator_depth;
        cp.retry_count = 7;
        cp.rnr_retry_count = 7;
 
+       memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+       rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+       if (!isert_conn->snd_w_inv)
+               rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+       cp.private_data = (void *)&rsp_hdr;
+       cp.private_data_len = sizeof(rsp_hdr);
+
        ret = rdma_accept(cm_id, &cp);
        if (ret) {
                isert_err("rdma_accept() failed with: %d\n", ret);
index 3d7fbc47c3434c32308136b7faec65590b74d439..8d50453eef66ce11d3cf321a535be0abe6b16d43 100644 (file)
@@ -3,6 +3,8 @@
 #include <linux/in6.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
 
 #define DRV_NAME       "isert"
 #define PFX            DRV_NAME ": "
 #define isert_err(fmt, arg...) \
        pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN       (sizeof(struct iser_ctrl) + \
+                                sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE   (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE     (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+                                  * NOOP_OUT(2), TEXT(1),
+                                  * SCSI_TMFUNC(2), LOGOUT(1)
+                                  */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX    (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS        8
+
+#define ISERT_QP_MAX_REQ_DTOS  (ISCSI_DEF_XMIT_CMDS_MAX *    \
+                               (1 + ISERT_INFLIGHT_DATAOUTS) + \
+                               ISERT_MAX_TX_MISC_PDUS  + \
+                               ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE       (ISER_RECV_DATA_SEG_LEN + 4096 - \
+               (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
 #define ISCSI_ISER_SG_TABLESIZE                256
 #define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
 #define ISER_BEACON_WRID               0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
 };
 
 struct iser_rx_desc {
-       struct iser_hdr iser_header;
+       struct iser_ctrl iser_header;
        struct iscsi_hdr iscsi_header;
        char            data[ISER_RECV_DATA_SEG_LEN];
        u64             dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
 } __packed;
 
 struct iser_tx_desc {
-       struct iser_hdr iser_header;
+       struct iser_ctrl iser_header;
        struct iscsi_hdr iscsi_header;
        enum isert_desc_type type;
        u64             dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
        uint32_t                write_stag;
        uint64_t                read_va;
        uint64_t                write_va;
+       uint32_t                inv_rkey;
        u64                     pdu_buf_dma;
        u32                     pdu_buf_len;
        struct isert_conn       *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
        struct work_struct      release_work;
        struct ib_recv_wr       beacon;
        bool                    logout_posted;
+       bool                    snd_w_inv;
 };
 
 #define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
        struct isert_comp       *comps;
        int                     comps_used;
        struct list_head        dev_node;
-       struct ib_device_attr   dev_attr;
        int                     (*reg_rdma_mem)(struct iscsi_conn *conn,
                                                    struct iscsi_cmd *cmd,
                                                    struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644 (file)
index 4dccd31..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/* From iscsi_iser.h */
-
-struct iser_hdr {
-       u8      flags;
-       u8      rsvd[3];
-       __be32  write_stag; /* write rkey */
-       __be64  write_va;
-       __be32  read_stag;  /* read rkey */
-       __be64  read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN  (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN  8192
-#define ISER_RX_PAYLOAD_SIZE    (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE      (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1),         *
-                                  * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX    (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS        8
-
-#define ISERT_QP_MAX_REQ_DTOS  (ISCSI_DEF_XMIT_CMDS_MAX *    \
-                               (1 + ISERT_INFLIGHT_DATAOUTS) + \
-                               ISERT_MAX_TX_MISC_PDUS  + \
-                               ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE       (ISER_RECV_DATA_SEG_LEN + 4096 - \
-               (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER       0x10
-#define ISER_WSV       0x08
-#define ISER_RSV       0x04
-#define ISCSI_CTRL     0x10
-#define ISER_HELLO     0x20
-#define ISER_HELLORPLY 0x30
index 3db9a659719b0f6283af610bf01b8f65d27292a9..03022f6420d770a469d818ee07fb7a4a4babda3b 100644 (file)
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
 
 static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+               const char *opname);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
 
 static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
                                  dev->max_pages_per_mr);
 }
 
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct srp_rdma_ch *ch = cq->cq_context;
+
+       complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+       .done           = srp_drain_done,
+};
+
 /**
  * srp_destroy_qp() - destroy an RDMA queue pair
  * @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
 static void srp_destroy_qp(struct srp_rdma_ch *ch)
 {
        static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
-       static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+       static struct ib_recv_wr wr = { 0 };
        struct ib_recv_wr *bad_wr;
        int ret;
 
+       wr.wr_cqe = &srp_drain_cqe;
        /* Destroying a QP and reusing ch->done is only safe if not connected */
        WARN_ON_ONCE(ch->connected);
 
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        struct ib_fmr_pool *fmr_pool = NULL;
        struct srp_fr_pool *fr_pool = NULL;
        const int m = dev->use_fast_reg ? 3 : 1;
-       struct ib_cq_init_attr cq_attr = {};
        int ret;
 
        init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
        if (!init_attr)
                return -ENOMEM;
 
-       /* + 1 for SRP_LAST_WR_ID */
-       cq_attr.cqe = target->queue_size + 1;
-       cq_attr.comp_vector = ch->comp_vector;
-       recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
-                              &cq_attr);
+       /* queue_size + 1 for ib_drain_qp */
+       recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+                               ch->comp_vector, IB_POLL_SOFTIRQ);
        if (IS_ERR(recv_cq)) {
                ret = PTR_ERR(recv_cq);
                goto err;
        }
 
-       cq_attr.cqe = m * target->queue_size;
-       cq_attr.comp_vector = ch->comp_vector;
-       send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
-                              &cq_attr);
+       send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+                               ch->comp_vector, IB_POLL_DIRECT);
        if (IS_ERR(send_cq)) {
                ret = PTR_ERR(send_cq);
                goto err_recv_cq;
        }
 
-       ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
        init_attr->event_handler       = srp_qp_event;
        init_attr->cap.max_send_wr     = m * target->queue_size;
        init_attr->cap.max_recv_wr     = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        if (ch->qp)
                srp_destroy_qp(ch);
        if (ch->recv_cq)
-               ib_destroy_cq(ch->recv_cq);
+               ib_free_cq(ch->recv_cq);
        if (ch->send_cq)
-               ib_destroy_cq(ch->send_cq);
+               ib_free_cq(ch->send_cq);
 
        ch->qp = qp;
        ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        return 0;
 
 err_qp:
-       ib_destroy_qp(qp);
+       srp_destroy_qp(ch);
 
 err_send_cq:
-       ib_destroy_cq(send_cq);
+       ib_free_cq(send_cq);
 
 err_recv_cq:
-       ib_destroy_cq(recv_cq);
+       ib_free_cq(recv_cq);
 
 err:
        kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
                if (ch->fmr_pool)
                        ib_destroy_fmr_pool(ch->fmr_pool);
        }
+
        srp_destroy_qp(ch);
-       ib_destroy_cq(ch->send_cq);
-       ib_destroy_cq(ch->recv_cq);
+       ib_free_cq(ch->send_cq);
+       ib_free_cq(ch->recv_cq);
 
        /*
         * Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
        return ret <= 0 ? ret : -ENODEV;
 }
 
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+               u32 rkey)
 {
        struct ib_send_wr *bad_wr;
        struct ib_send_wr wr = {
                .opcode             = IB_WR_LOCAL_INV,
-               .wr_id              = LOCAL_INV_WR_ID_MASK,
                .next               = NULL,
                .num_sge            = 0,
                .send_flags         = 0,
                .ex.invalidate_rkey = rkey,
        };
 
+       wr.wr_cqe = &req->reg_cqe;
+       req->reg_cqe.done = srp_inv_rkey_err_done;
        return ib_post_send(ch->qp, &wr, &bad_wr);
 }
 
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
                struct srp_fr_desc **pfr;
 
                for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
-                       res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+                       res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
                        if (res < 0) {
                                shost_printk(KERN_ERR, target->scsi_host, PFX
                                  "Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
        return 0;
 }
 
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
 static int srp_map_finish_fr(struct srp_map_state *state,
+                            struct srp_request *req,
                             struct srp_rdma_ch *ch, int sg_nents)
 {
        struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        if (unlikely(n < 0))
                return n;
 
+       req->reg_cqe.done = srp_reg_mr_err_done;
+
        wr.wr.next = NULL;
        wr.wr.opcode = IB_WR_REG_MR;
-       wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+       wr.wr.wr_cqe = &req->reg_cqe;
        wr.wr.num_sge = 0;
        wr.wr.send_flags = 0;
        wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
        while (count) {
                int i, n;
 
-               n = srp_map_finish_fr(state, ch, count);
+               n = srp_map_finish_fr(state, req, ch, count);
                if (unlikely(n < 0))
                        return n;
 
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
                idb_sg->dma_length = idb_sg->length;          /* hack^2 */
 #endif
-               ret = srp_map_finish_fr(&state, ch, 1);
+               ret = srp_map_finish_fr(&state, req, ch, 1);
                if (ret < 0)
                        return ret;
        } else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
        s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
        struct srp_iu *iu;
 
-       srp_send_completion(ch->send_cq, ch);
+       ib_process_cq_direct(ch->send_cq, -1);
 
        if (list_empty(&ch->free_tx))
                return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
        return iu;
 }
 
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+       struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+       struct srp_rdma_ch *ch = cq->cq_context;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               srp_handle_qp_err(cq, wc, "SEND");
+               return;
+       }
+
+       list_add(&iu->list, &ch->free_tx);
+}
+
 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
 {
        struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
        list.length = len;
        list.lkey   = target->lkey;
 
+       iu->cqe.done = srp_send_done;
+
        wr.next       = NULL;
-       wr.wr_id      = (uintptr_t) iu;
+       wr.wr_cqe     = &iu->cqe;
        wr.sg_list    = &list;
        wr.num_sge    = 1;
        wr.opcode     = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
        list.length = iu->size;
        list.lkey   = target->lkey;
 
+       iu->cqe.done = srp_recv_done;
+
        wr.next     = NULL;
-       wr.wr_id    = (uintptr_t) iu;
+       wr.wr_cqe   = &iu->cqe;
        wr.sg_list  = &list;
        wr.num_sge  = 1;
 
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
                             "problems processing SRP_AER_REQ\n");
 }
 
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+       struct srp_rdma_ch *ch = cq->cq_context;
        struct srp_target_port *target = ch->target;
        struct ib_device *dev = target->srp_host->srp_dev->dev;
-       struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
        int res;
        u8 opcode;
 
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               srp_handle_qp_err(cq, wc, "RECV");
+               return;
+       }
+
        ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
                                   DMA_FROM_DEVICE);
 
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
                srp_start_tl_fail_timers(target->rport);
 }
 
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
-                             bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+               const char *opname)
 {
+       struct srp_rdma_ch *ch = cq->cq_context;
        struct srp_target_port *target = ch->target;
 
-       if (wr_id == SRP_LAST_WR_ID) {
-               complete(&ch->done);
-               return;
-       }
-
        if (ch->connected && !target->qp_in_error) {
-               if (wr_id & LOCAL_INV_WR_ID_MASK) {
-                       shost_printk(KERN_ERR, target->scsi_host, PFX
-                                    "LOCAL_INV failed with status %s (%d)\n",
-                                    ib_wc_status_msg(wc_status), wc_status);
-               } else if (wr_id & FAST_REG_WR_ID_MASK) {
-                       shost_printk(KERN_ERR, target->scsi_host, PFX
-                                    "FAST_REG_MR failed status %s (%d)\n",
-                                    ib_wc_status_msg(wc_status), wc_status);
-               } else {
-                       shost_printk(KERN_ERR, target->scsi_host,
-                                    PFX "failed %s status %s (%d) for iu %p\n",
-                                    send_err ? "send" : "receive",
-                                    ib_wc_status_msg(wc_status), wc_status,
-                                    (void *)(uintptr_t)wr_id);
-               }
+               shost_printk(KERN_ERR, target->scsi_host,
+                            PFX "failed %s status %s (%d) for CQE %p\n",
+                            opname, ib_wc_status_msg(wc->status), wc->status,
+                            wc->wr_cqe);
                queue_work(system_long_wq, &target->tl_err_work);
        }
        target->qp_in_error = true;
 }
 
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
-       struct srp_rdma_ch *ch = ch_ptr;
-       struct ib_wc wc;
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               if (likely(wc.status == IB_WC_SUCCESS)) {
-                       srp_handle_recv(ch, &wc);
-               } else {
-                       srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
-               }
-       }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
-       struct srp_rdma_ch *ch = ch_ptr;
-       struct ib_wc wc;
-       struct srp_iu *iu;
-
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               if (likely(wc.status == IB_WC_SUCCESS)) {
-                       iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
-                       list_add(&iu->list, &ch->free_tx);
-               } else {
-                       srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
-               }
-       }
-}
-
 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 {
        struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
 static void srp_add_one(struct ib_device *device)
 {
        struct srp_device *srp_dev;
-       struct ib_device_attr *dev_attr;
        struct srp_host *host;
        int mr_page_shift, p;
        u64 max_pages_per_mr;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               pr_warn("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
        if (!srp_dev)
-               goto free_attr;
+               return;
 
        srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
                            device->map_phys_fmr && device->unmap_fmr);
-       srp_dev->has_fr = (dev_attr->device_cap_flags &
+       srp_dev->has_fr = (device->attrs.device_cap_flags &
                           IB_DEVICE_MEM_MGT_EXTENSIONS);
        if (!srp_dev->has_fmr && !srp_dev->has_fr)
                dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
         * minimum of 4096 bytes. We're unlikely to build large sglists
         * out of smaller entries.
         */
-       mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
+       mr_page_shift           = max(12, ffs(device->attrs.page_size_cap) - 1);
        srp_dev->mr_page_size   = 1 << mr_page_shift;
        srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
-       max_pages_per_mr        = dev_attr->max_mr_size;
+       max_pages_per_mr        = device->attrs.max_mr_size;
        do_div(max_pages_per_mr, srp_dev->mr_page_size);
        srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
                                          max_pages_per_mr);
        if (srp_dev->use_fast_reg) {
                srp_dev->max_pages_per_mr =
                        min_t(u32, srp_dev->max_pages_per_mr,
-                             dev_attr->max_fast_reg_page_list_len);
+                             device->attrs.max_fast_reg_page_list_len);
        }
        srp_dev->mr_max_size    = srp_dev->mr_page_size *
                                   srp_dev->max_pages_per_mr;
-       pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
-                device->name, mr_page_shift, dev_attr->max_mr_size,
-                dev_attr->max_fast_reg_page_list_len,
+       pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+                device->name, mr_page_shift, device->attrs.max_mr_size,
+                device->attrs.max_fast_reg_page_list_len,
                 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
        INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
        }
 
        ib_set_client_data(device, &srp_client, srp_dev);
-
-       goto free_attr;
+       return;
 
 err_pd:
        ib_dealloc_pd(srp_dev->pd);
 
 free_dev:
        kfree(srp_dev);
-
-free_attr:
-       kfree(dev_attr);
 }
 
 static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
 {
        int ret;
 
-       BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
        if (srp_sg_tablesize) {
                pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
                if (!cmd_sg_entries)
index f6af531f9f32c990cd30c7139d247c643144e55d..9e05ce4a04fd08b0a450f5a546b7fb51901c5afe 100644 (file)
@@ -66,11 +66,6 @@ enum {
        SRP_TAG_TSK_MGMT        = 1U << 31,
 
        SRP_MAX_PAGES_PER_MR    = 512,
-
-       LOCAL_INV_WR_ID_MASK    = 1,
-       FAST_REG_WR_ID_MASK     = 2,
-
-       SRP_LAST_WR_ID          = 0xfffffffcU,
 };
 
 enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
        struct srp_direct_buf  *indirect_desc;
        dma_addr_t              indirect_dma_addr;
        short                   nmdesc;
+       struct ib_cqe           reg_cqe;
 };
 
 /**
@@ -231,6 +227,7 @@ struct srp_iu {
        void                   *buf;
        size_t                  size;
        enum dma_data_direction direction;
+       struct ib_cqe           cqe;
 };
 
 /**
index bc5470c43d26080c5fa0f9d0e6427e848ae2771a..0c37fee363b1d60eb3dd63ebfdc6330b2e14b562 100644 (file)
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
 static struct ib_client srpt_client;
 static void srpt_release_channel(struct srpt_rdma_ch *ch);
 static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /**
  * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
        memset(iocp, 0, sizeof *iocp);
        strcpy(iocp->id_string, SRPT_ID_STRING);
        iocp->guid = cpu_to_be64(srpt_service_guid);
-       iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
-       iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
-       iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
-       iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+       iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+       iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+       iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+       iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
        iocp->subsys_device_id = 0x0;
        iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
        iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
  * srpt_mad_recv_handler() - MAD reception callback function.
  */
 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+                                 struct ib_mad_send_buf *send_buf,
                                  struct ib_mad_recv_wc *mad_wc)
 {
        struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
        struct ib_recv_wr wr, *bad_wr;
 
        BUG_ON(!sdev);
-       wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
        list.addr = ioctx->ioctx.dma;
        list.length = srp_max_req_size;
        list.lkey = sdev->pd->local_dma_lkey;
 
+       ioctx->ioctx.cqe.done = srpt_recv_done;
+       wr.wr_cqe = &ioctx->ioctx.cqe;
        wr.next = NULL;
        wr.sg_list = &list;
        wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
        list.length = len;
        list.lkey = sdev->pd->local_dma_lkey;
 
+       ioctx->ioctx.cqe.done = srpt_send_done;
        wr.next = NULL;
-       wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+       wr.wr_cqe = &ioctx->ioctx.cqe;
        wr.sg_list = &list;
        wr.num_sge = 1;
        wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 
        BUG_ON(!ch);
        BUG_ON(!ioctx);
-       BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+       BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
 
        while (ioctx->n_rdma)
-               kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+               kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
 
-       kfree(ioctx->rdma_ius);
-       ioctx->rdma_ius = NULL;
+       kfree(ioctx->rdma_wrs);
+       ioctx->rdma_wrs = NULL;
 
        if (ioctx->mapped_sg_count) {
                sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
        struct scatterlist *sg, *sg_orig;
        int sg_cnt;
        enum dma_data_direction dir;
-       struct rdma_iu *riu;
+       struct ib_rdma_wr *riu;
        struct srp_direct_buf *db;
        dma_addr_t dma_addr;
        struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 
        ioctx->mapped_sg_count = count;
 
-       if (ioctx->rdma_ius && ioctx->n_rdma_ius)
-               nrdma = ioctx->n_rdma_ius;
+       if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+               nrdma = ioctx->n_rdma_wrs;
        else {
                nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
                        + ioctx->n_rbuf;
 
-               ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
-               if (!ioctx->rdma_ius)
+               ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+                               GFP_KERNEL);
+               if (!ioctx->rdma_wrs)
                        goto free_mem;
 
-               ioctx->n_rdma_ius = nrdma;
+               ioctx->n_rdma_wrs = nrdma;
        }
 
        db = ioctx->rbufs;
        tsize = cmd->data_length;
        dma_len = ib_sg_dma_len(dev, &sg[0]);
-       riu = ioctx->rdma_ius;
+       riu = ioctx->rdma_wrs;
 
        /*
         * For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
             j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
                rsize = be32_to_cpu(db->len);
                raddr = be64_to_cpu(db->va);
-               riu->raddr = raddr;
+               riu->remote_addr = raddr;
                riu->rkey = be32_to_cpu(db->key);
-               riu->sge_cnt = 0;
+               riu->wr.num_sge = 0;
 
                /* calculate how many sge required for this remote_buf */
                while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
                                rsize = 0;
                        }
 
-                       ++riu->sge_cnt;
+                       ++riu->wr.num_sge;
 
-                       if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+                       if (rsize > 0 &&
+                           riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
                                ++ioctx->n_rdma;
-                               riu->sge =
-                                   kmalloc(riu->sge_cnt * sizeof *riu->sge,
-                                           GFP_KERNEL);
-                               if (!riu->sge)
+                               riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+                                               sizeof(*riu->wr.sg_list),
+                                               GFP_KERNEL);
+                               if (!riu->wr.sg_list)
                                        goto free_mem;
 
                                ++riu;
-                               riu->sge_cnt = 0;
-                               riu->raddr = raddr;
+                               riu->wr.num_sge = 0;
+                               riu->remote_addr = raddr;
                                riu->rkey = be32_to_cpu(db->key);
                        }
                }
 
                ++ioctx->n_rdma;
-               riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
-                                  GFP_KERNEL);
-               if (!riu->sge)
+               riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+                                       sizeof(*riu->wr.sg_list),
+                                       GFP_KERNEL);
+               if (!riu->wr.sg_list)
                        goto free_mem;
        }
 
        db = ioctx->rbufs;
        tsize = cmd->data_length;
-       riu = ioctx->rdma_ius;
+       riu = ioctx->rdma_wrs;
        sg = sg_orig;
        dma_len = ib_sg_dma_len(dev, &sg[0]);
        dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
        for (i = 0, j = 0;
             j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
                rsize = be32_to_cpu(db->len);
-               sge = riu->sge;
+               sge = riu->wr.sg_list;
                k = 0;
 
                while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
                        }
 
                        ++k;
-                       if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+                       if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
                                ++riu;
-                               sge = riu->sge;
+                               sge = riu->wr.sg_list;
                                k = 0;
                        } else if (rsize > 0 && tsize > 0)
                                ++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
        ioctx->n_rbuf = 0;
        ioctx->rbufs = NULL;
        ioctx->n_rdma = 0;
-       ioctx->n_rdma_ius = 0;
-       ioctx->rdma_ius = NULL;
+       ioctx->n_rdma_wrs = 0;
+       ioctx->rdma_wrs = NULL;
        ioctx->mapped_sg_count = 0;
        init_completion(&ioctx->tx_done);
        ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
 }
 
 /**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
-       struct srpt_send_ioctx *ioctx;
-       enum srpt_command_state state;
-       u32 index;
-
-       atomic_inc(&ch->sq_wr_avail);
-
-       index = idx_from_wr_id(wr_id);
-       ioctx = ch->ioctx_ring[index];
-       state = srpt_get_cmd_state(ioctx);
-
-       WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
-               && state != SRPT_STATE_MGMT_RSP_SENT
-               && state != SRPT_STATE_NEED_DATA
-               && state != SRPT_STATE_DONE);
-
-       /* If SRP_RSP sending failed, undo the ch->req_lim change. */
-       if (state == SRPT_STATE_CMD_RSP_SENT
-           || state == SRPT_STATE_MGMT_RSP_SENT)
-               atomic_dec(&ch->req_lim);
-
-       srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
-                                 struct srpt_send_ioctx *ioctx)
-{
-       enum srpt_command_state state;
-
-       atomic_inc(&ch->sq_wr_avail);
-
-       state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
-       if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
-                   && state != SRPT_STATE_MGMT_RSP_SENT
-                   && state != SRPT_STATE_DONE))
-               pr_debug("state = %d\n", state);
-
-       if (state != SRPT_STATE_DONE) {
-               srpt_unmap_sg_to_ib_sge(ch, ioctx);
-               transport_generic_free_cmd(&ioctx->cmd, 0);
-       } else {
-               pr_err("IB completion has been received too late for"
-                      " wr_id = %u.\n", ioctx->ioctx.index);
-       }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
  * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
  * the data that has been transferred via IB RDMA had to be postponed until the
  * check_stop_free() callback.  None of this is necessary anymore and needs to
  * be cleaned up.
  */
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
-                                 struct srpt_send_ioctx *ioctx,
-                                 enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
        WARN_ON(ioctx->n_rdma <= 0);
        atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
 
-       if (opcode == SRPT_RDMA_READ_LAST) {
-               if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
-                                               SRPT_STATE_DATA_IN))
-                       target_execute_cmd(&ioctx->cmd);
-               else
-                       pr_err("%s[%d]: wrong state = %d\n", __func__,
-                              __LINE__, srpt_get_cmd_state(ioctx));
-       } else if (opcode == SRPT_RDMA_ABORT) {
-               ioctx->rdma_aborted = true;
-       } else {
-               WARN(true, "unexpected opcode %d\n", opcode);
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+                       ioctx, wc->status);
+               srpt_abort_cmd(ioctx);
+               return;
        }
+
+       if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+                                       SRPT_STATE_DATA_IN))
+               target_execute_cmd(&ioctx->cmd);
+       else
+               pr_err("%s[%d]: wrong state = %d\n", __func__,
+                      __LINE__, srpt_get_cmd_state(ioctx));
 }
 
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
-                                     struct srpt_send_ioctx *ioctx,
-                                     enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       enum srpt_command_state state;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
 
-       state = srpt_get_cmd_state(ioctx);
-       switch (opcode) {
-       case SRPT_RDMA_READ_LAST:
-               if (ioctx->n_rdma <= 0) {
-                       pr_err("Received invalid RDMA read"
-                              " error completion with idx %d\n",
-                              ioctx->ioctx.index);
-                       break;
-               }
-               atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
-               if (state == SRPT_STATE_NEED_DATA)
-                       srpt_abort_cmd(ioctx);
-               else
-                       pr_err("%s[%d]: wrong state = %d\n",
-                              __func__, __LINE__, state);
-               break;
-       case SRPT_RDMA_WRITE_LAST:
-               break;
-       default:
-               pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
-               break;
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+                       ioctx, wc->status);
+               srpt_abort_cmd(ioctx);
        }
 }
 
@@ -1926,32 +1859,26 @@ out:
        return;
 }
 
-static void srpt_process_rcv_completion(struct ib_cq *cq,
-                                       struct srpt_rdma_ch *ch,
-                                       struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct srpt_device *sdev = ch->sport->sdev;
-       struct srpt_recv_ioctx *ioctx;
-       u32 index;
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_recv_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
 
-       index = idx_from_wr_id(wc->wr_id);
        if (wc->status == IB_WC_SUCCESS) {
                int req_lim;
 
                req_lim = atomic_dec_return(&ch->req_lim);
                if (unlikely(req_lim < 0))
                        pr_err("req_lim = %d < 0\n", req_lim);
-               ioctx = sdev->ioctx_ring[index];
                srpt_handle_new_iu(ch, ioctx, NULL);
        } else {
-               pr_info("receiving failed for idx %u with status %d\n",
-                       index, wc->status);
+               pr_info("receiving failed for ioctx %p with status %d\n",
+                       ioctx, wc->status);
        }
 }
 
 /**
- * srpt_process_send_completion() - Process an IB send completion.
- *
  * Note: Although this has not yet been observed during tests, at least in
  * theory it is possible that the srpt_get_send_ioctx() call invoked by
  * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,108 +1891,51 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
  * are queued on cmd_wait_list. The code below processes these delayed
  * requests one at a time.
  */
-static void srpt_process_send_completion(struct ib_cq *cq,
-                                        struct srpt_rdma_ch *ch,
-                                        struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       struct srpt_send_ioctx *send_ioctx;
-       uint32_t index;
-       enum srpt_opcode opcode;
+       struct srpt_rdma_ch *ch = cq->cq_context;
+       struct srpt_send_ioctx *ioctx =
+               container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+       enum srpt_command_state state;
 
-       index = idx_from_wr_id(wc->wr_id);
-       opcode = opcode_from_wr_id(wc->wr_id);
-       send_ioctx = ch->ioctx_ring[index];
-       if (wc->status == IB_WC_SUCCESS) {
-               if (opcode == SRPT_SEND)
-                       srpt_handle_send_comp(ch, send_ioctx);
-               else {
-                       WARN_ON(opcode != SRPT_RDMA_ABORT &&
-                               wc->opcode != IB_WC_RDMA_READ);
-                       srpt_handle_rdma_comp(ch, send_ioctx, opcode);
-               }
+       state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+       WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+               state != SRPT_STATE_MGMT_RSP_SENT);
+
+       atomic_inc(&ch->sq_wr_avail);
+
+       if (wc->status != IB_WC_SUCCESS) {
+               pr_info("sending response for ioctx 0x%p failed"
+                       " with status %d\n", ioctx, wc->status);
+
+               atomic_dec(&ch->req_lim);
+               srpt_abort_cmd(ioctx);
+               goto out;
+       }
+
+       if (state != SRPT_STATE_DONE) {
+               srpt_unmap_sg_to_ib_sge(ch, ioctx);
+               transport_generic_free_cmd(&ioctx->cmd, 0);
        } else {
-               if (opcode == SRPT_SEND) {
-                       pr_info("sending response for idx %u failed"
-                               " with status %d\n", index, wc->status);
-                       srpt_handle_send_err_comp(ch, wc->wr_id);
-               } else if (opcode != SRPT_RDMA_MID) {
-                       pr_info("RDMA t %d for idx %u failed with"
-                               " status %d\n", opcode, index, wc->status);
-                       srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
-               }
+               pr_err("IB completion has been received too late for"
+                      " wr_id = %u.\n", ioctx->ioctx.index);
        }
 
-       while (unlikely(opcode == SRPT_SEND
-                       && !list_empty(&ch->cmd_wait_list)
-                       && srpt_get_ch_state(ch) == CH_LIVE
-                       && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+       while (!list_empty(&ch->cmd_wait_list) &&
+              srpt_get_ch_state(ch) == CH_LIVE &&
+              (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
                struct srpt_recv_ioctx *recv_ioctx;
 
                recv_ioctx = list_first_entry(&ch->cmd_wait_list,
                                              struct srpt_recv_ioctx,
                                              wait_list);
                list_del(&recv_ioctx->wait_list);
-               srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
-       }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
-       struct ib_wc *const wc = ch->wc;
-       int i, n;
-
-       WARN_ON(cq != ch->cq);
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
-               for (i = 0; i < n; i++) {
-                       if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
-                               srpt_process_rcv_completion(cq, ch, &wc[i]);
-                       else
-                               srpt_process_send_completion(cq, ch, &wc[i]);
-               }
+               srpt_handle_new_iu(ch, recv_ioctx, ioctx);
        }
 }
 
-/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- *   concurrently on two different CPUs for the same completion queue. See also
- *   Documentation/infiniband/core_locking.txt and the implementation of
- *   handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- *   context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
-       struct srpt_rdma_ch *ch = ctx;
-
-       wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
-       struct srpt_rdma_ch *ch;
-
-       /* Hibernation / freezing of the SRPT kernel thread is not supported. */
-       current->flags |= PF_NOFREEZE;
-
-       ch = arg;
-       BUG_ON(!ch);
-       pr_info("Session %s: kernel thread %s (PID %d) started\n",
-               ch->sess_name, ch->thread->comm, current->pid);
-       while (!kthread_should_stop()) {
-               wait_event_interruptible(ch->wait_queue,
-                       (srpt_process_completion(ch->cq, ch),
-                        kthread_should_stop()));
-       }
-       pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
-               ch->sess_name, ch->thread->comm, current->pid);
-       return 0;
-}
-
 /**
  * srpt_create_ch_ib() - Create receive and send completion queues.
  */
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
        struct srpt_port *sport = ch->sport;
        struct srpt_device *sdev = sport->sdev;
        u32 srp_sq_size = sport->port_attrib.srp_sq_size;
-       struct ib_cq_init_attr cq_attr = {};
        int ret;
 
        WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
                goto out;
 
 retry:
-       cq_attr.cqe = ch->rq_size + srp_sq_size;
-       ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
-                             &cq_attr);
+       ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+                       0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
                pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
        if (ret)
                goto err_destroy_qp;
 
-       init_waitqueue_head(&ch->wait_queue);
-
-       pr_debug("creating thread for session %s\n", ch->sess_name);
-
-       ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
-       if (IS_ERR(ch->thread)) {
-               pr_err("failed to create kernel thread %ld\n",
-                      PTR_ERR(ch->thread));
-               ch->thread = NULL;
-               goto err_destroy_qp;
-       }
-
 out:
        kfree(qp_init);
        return ret;
@@ -2150,17 +2006,14 @@ out:
 err_destroy_qp:
        ib_destroy_qp(ch->qp);
 err_destroy_cq:
-       ib_destroy_cq(ch->cq);
+       ib_free_cq(ch->cq);
        goto out;
 }
 
 static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
 {
-       if (ch->thread)
-               kthread_stop(ch->thread);
-
        ib_destroy_qp(ch->qp);
-       ib_destroy_cq(ch->cq);
+       ib_free_cq(ch->cq);
 }
 
 /**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                              struct srpt_send_ioctx *ioctx)
 {
-       struct ib_rdma_wr wr;
        struct ib_send_wr *bad_wr;
-       struct rdma_iu *riu;
-       int i;
-       int ret;
-       int sq_wr_avail;
+       int sq_wr_avail, ret, i;
        enum dma_data_direction dir;
        const int n_rdma = ioctx->n_rdma;
 
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                }
        }
 
-       ioctx->rdma_aborted = false;
-       ret = 0;
-       riu = ioctx->rdma_ius;
-       memset(&wr, 0, sizeof wr);
-
-       for (i = 0; i < n_rdma; ++i, ++riu) {
-               if (dir == DMA_FROM_DEVICE) {
-                       wr.wr.opcode = IB_WR_RDMA_WRITE;
-                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
-                                               SRPT_RDMA_WRITE_LAST :
-                                               SRPT_RDMA_MID,
-                                               ioctx->ioctx.index);
-               } else {
-                       wr.wr.opcode = IB_WR_RDMA_READ;
-                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
-                                               SRPT_RDMA_READ_LAST :
-                                               SRPT_RDMA_MID,
-                                               ioctx->ioctx.index);
-               }
-               wr.wr.next = NULL;
-               wr.remote_addr = riu->raddr;
-               wr.rkey = riu->rkey;
-               wr.wr.num_sge = riu->sge_cnt;
-               wr.wr.sg_list = riu->sge;
+       for (i = 0; i < n_rdma; i++) {
+               struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
 
-               /* only get completion event for the last rdma write */
-               if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
-                       wr.wr.send_flags = IB_SEND_SIGNALED;
+               wr->opcode = (dir == DMA_FROM_DEVICE) ?
+                               IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
 
-               ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
-               if (ret)
-                       break;
+               if (i == n_rdma - 1) {
+                       /* only get completion event for the last rdma read */
+                       if (dir == DMA_TO_DEVICE) {
+                               wr->send_flags = IB_SEND_SIGNALED;
+                               ioctx->rdma_cqe.done = srpt_rdma_read_done;
+                       } else {
+                               ioctx->rdma_cqe.done = srpt_rdma_write_done;
+                       }
+                       wr->wr_cqe = &ioctx->rdma_cqe;
+                       wr->next = NULL;
+               } else {
+                       wr->wr_cqe = NULL;
+                       wr->next = &ioctx->rdma_wrs[i + 1].wr;
+               }
        }
 
+       ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
        if (ret)
                pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                 __func__, __LINE__, ret, i, n_rdma);
-       if (ret && i > 0) {
-               wr.wr.num_sge = 0;
-               wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
-               wr.wr.send_flags = IB_SEND_SIGNALED;
-               while (ch->state == CH_LIVE &&
-                       ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
-                       pr_info("Trying to abort failed RDMA transfer [%d]\n",
-                               ioctx->ioctx.index);
-                       msleep(1000);
-               }
-               while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
-                       pr_info("Waiting until RDMA abort finished [%d]\n",
-                               ioctx->ioctx.index);
-                       msleep(1000);
-               }
-       }
 out:
        if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
                atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
        init_waitqueue_head(&sdev->ch_releaseQ);
        spin_lock_init(&sdev->spinlock);
 
-       if (ib_query_device(device, &sdev->dev_attr))
-               goto free_dev;
-
        sdev->pd = ib_alloc_pd(device);
        if (IS_ERR(sdev->pd))
                goto free_dev;
 
-       sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+       sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
 
        srq_attr.event_handler = srpt_srq_event;
        srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
                goto err_pd;
 
        pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
-                __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+                __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
                 device->name);
 
        if (!srpt_service_guid)
index 5366e0a9fd6d42d298cf296ed70dd70c618e8de0..09037f2b0b51430d568e847e7ebd5f24e7fd906e 100644 (file)
@@ -128,36 +128,6 @@ enum {
        DEFAULT_MAX_RDMA_SIZE = 65536,
 };
 
-enum srpt_opcode {
-       SRPT_RECV,
-       SRPT_SEND,
-       SRPT_RDMA_MID,
-       SRPT_RDMA_ABORT,
-       SRPT_RDMA_READ_LAST,
-       SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
-       return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
-       return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
-       return (u32)wr_id;
-}
-
-struct rdma_iu {
-       u64             raddr;
-       u32             rkey;
-       struct ib_sge   *sge;
-       u32             sge_cnt;
-       int             mem_id;
-};
-
 /**
  * enum srpt_command_state - SCSI command state managed by SRPT.
  * @SRPT_STATE_NEW:           New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
  * @index: Index of the I/O context in its ioctx_ring array.
  */
 struct srpt_ioctx {
+       struct ib_cqe           cqe;
        void                    *buf;
        dma_addr_t              dma;
        uint32_t                index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
  * @sg:          Pointer to sg-list associated with this I/O context.
  * @sg_cnt:      SG-list size.
  * @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius:  Number of elements in the rdma_ius array.
- * @rdma_ius:    Array with information about the RDMA mapping.
+ * @n_rdma_wrs:  Number of elements in the rdma_wrs array.
+ * @rdma_wrs:    Array with information about the RDMA mapping.
  * @tag:         Tag of the received SRP information unit.
  * @spinlock:    Protects 'state'.
  * @state:       I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- *              the already initiated transfers have finished.
  * @cmd:         Target core command data structure.
  * @sense_data:  SCSI sense data.
  */
 struct srpt_send_ioctx {
        struct srpt_ioctx       ioctx;
        struct srpt_rdma_ch     *ch;
-       struct rdma_iu          *rdma_ius;
+       struct ib_rdma_wr       *rdma_wrs;
+       struct ib_cqe           rdma_cqe;
        struct srp_direct_buf   *rbufs;
        struct srp_direct_buf   single_rbuf;
        struct scatterlist      *sg;
        struct list_head        free_list;
        spinlock_t              spinlock;
        enum srpt_command_state state;
-       bool                    rdma_aborted;
        struct se_cmd           cmd;
        struct completion       tx_done;
        int                     sg_cnt;
        int                     mapped_sg_count;
-       u16                     n_rdma_ius;
+       u16                     n_rdma_wrs;
        u8                      n_rdma;
        u8                      n_rbuf;
        bool                    queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
 
 /**
  * struct srpt_rdma_ch - RDMA channel.
- * @wait_queue:    Allows the kernel thread to wait for more work.
- * @thread:        Kernel thread that processes the IB queues associated with
- *                 the channel.
  * @cm_id:         IB CM ID associated with the channel.
  * @qp:            IB queue pair used for communicating over this channel.
  * @cq:            IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
  * @free_list:     Head of list with free send I/O contexts.
  * @state:         channel state. See also enum rdma_ch_state.
  * @ioctx_ring:    Send ring.
- * @wc:            IB work completion array for srpt_process_completion().
  * @list:          Node for insertion in the srpt_device.rch_list list.
  * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
  *                 list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
  * @release_done:  Enables waiting for srpt_release_channel() completion.
  */
 struct srpt_rdma_ch {
-       wait_queue_head_t       wait_queue;
-       struct task_struct      *thread;
        struct ib_cm_id         *cm_id;
        struct ib_qp            *qp;
        struct ib_cq            *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
        struct list_head        free_list;
        enum rdma_ch_state      state;
        struct srpt_send_ioctx  **ioctx_ring;
-       struct ib_wc            wc[16];
        struct list_head        list;
        struct list_head        cmd_wait_list;
        struct se_session       *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
  * @mr:            L_Key (local key) with write access to all local memory.
  * @srq:           Per-HCA SRQ (shared receive queue).
  * @cm_id:         Connection identifier.
- * @dev_attr:      Attributes of the InfiniBand device as obtained during the
- *                 ib_client.add() callback.
  * @srq_size:      SRQ size.
  * @ioctx_ring:    Per-HCA SRQ.
  * @rch_list:      Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
        struct ib_pd            *pd;
        struct ib_srq           *srq;
        struct ib_cm_id         *cm_id;
-       struct ib_device_attr   dev_attr;
        int                     srq_size;
        struct srpt_recv_ioctx  **ioctx_ring;
        struct list_head        rch_list;
index fd4100d56d8c5509986be23e40d2af137cd840b1..6727954ab74be9338e9c3d46e12a55fa3db7a6e7 100644 (file)
  */
 
 #include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/usb/input.h>
+#include <linux/usb/quirks.h>
 
 #define DRIVER_AUTHOR "Marko Friedemann <mfr@bmx-chemnitz.de>"
 #define DRIVER_DESC "X-Box pad driver"
@@ -125,7 +128,7 @@ static const struct xpad_device {
        { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
        { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
        { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
-       { 0x045e, 0x02dd, "Microsoft X-Box One pad (Covert Forces)", 0, XTYPE_XBOXONE },
+       { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
        { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
@@ -317,21 +320,42 @@ static struct usb_device_id xpad_table[] = {
 
 MODULE_DEVICE_TABLE(usb, xpad_table);
 
+struct xpad_output_packet {
+       u8 data[XPAD_PKT_LEN];
+       u8 len;
+       bool pending;
+};
+
+#define XPAD_OUT_CMD_IDX       0
+#define XPAD_OUT_FF_IDX                1
+#define XPAD_OUT_LED_IDX       (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF))
+#define XPAD_NUM_OUT_PACKETS   (1 + \
+                                IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \
+                                IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS))
+
 struct usb_xpad {
        struct input_dev *dev;          /* input device interface */
+       struct input_dev __rcu *x360w_dev;
        struct usb_device *udev;        /* usb device */
        struct usb_interface *intf;     /* usb interface */
 
-       int pad_present;
+       bool pad_present;
+       bool input_created;
 
        struct urb *irq_in;             /* urb for interrupt in report */
        unsigned char *idata;           /* input data */
        dma_addr_t idata_dma;
 
        struct urb *irq_out;            /* urb for interrupt out report */
+       struct usb_anchor irq_out_anchor;
+       bool irq_out_active;            /* we must not use an active URB */
+       u8 odata_serial;                /* serial number for xbox one protocol */
        unsigned char *odata;           /* output data */
        dma_addr_t odata_dma;
-       struct mutex odata_mutex;
+       spinlock_t odata_lock;
+
+       struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS];
+       int last_out_packet;
 
 #if defined(CONFIG_JOYSTICK_XPAD_LEDS)
        struct xpad_led *led;
@@ -343,8 +367,12 @@ struct usb_xpad {
        int xtype;                      /* type of xbox device */
        int pad_nr;                     /* the order x360 pads were attached */
        const char *name;               /* name of the device */
+       struct work_struct work;        /* init/remove device from callback */
 };
 
+static int xpad_init_input(struct usb_xpad *xpad);
+static void xpad_deinit_input(struct usb_xpad *xpad);
+
 /*
  *     xpad_process_packet
  *
@@ -424,11 +452,9 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
  *             http://www.free60.org/wiki/Gamepad
  */
 
-static void xpad360_process_packet(struct usb_xpad *xpad,
+static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
                                   u16 cmd, unsigned char *data)
 {
-       struct input_dev *dev = xpad->dev;
-
        /* digital pad */
        if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                /* dpad as buttons (left, right, up, down) */
@@ -495,7 +521,30 @@ static void xpad360_process_packet(struct usb_xpad *xpad,
        input_sync(dev);
 }
 
-static void xpad_identify_controller(struct usb_xpad *xpad);
+static void xpad_presence_work(struct work_struct *work)
+{
+       struct usb_xpad *xpad = container_of(work, struct usb_xpad, work);
+       int error;
+
+       if (xpad->pad_present) {
+               error = xpad_init_input(xpad);
+               if (error) {
+                       /* complain only, not much else we can do here */
+                       dev_err(&xpad->dev->dev,
+                               "unable to init device: %d\n", error);
+               } else {
+                       rcu_assign_pointer(xpad->x360w_dev, xpad->dev);
+               }
+       } else {
+               RCU_INIT_POINTER(xpad->x360w_dev, NULL);
+               synchronize_rcu();
+               /*
+                * Now that we are sure xpad360w_process_packet is not
+                * using input device we can get rid of it.
+                */
+               xpad_deinit_input(xpad);
+       }
+}
 
 /*
  * xpad360w_process_packet
@@ -513,24 +562,28 @@ static void xpad_identify_controller(struct usb_xpad *xpad);
  */
 static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
 {
+       struct input_dev *dev;
+       bool present;
+
        /* Presence change */
        if (data[0] & 0x08) {
-               if (data[1] & 0x80) {
-                       xpad->pad_present = 1;
-                       /*
-                        * Light up the segment corresponding to
-                        * controller number.
-                        */
-                       xpad_identify_controller(xpad);
-               } else
-                       xpad->pad_present = 0;
+               present = (data[1] & 0x80) != 0;
+
+               if (xpad->pad_present != present) {
+                       xpad->pad_present = present;
+                       schedule_work(&xpad->work);
+               }
        }
 
        /* Valid pad data */
-       if (!(data[1] & 0x1))
+       if (data[1] != 0x1)
                return;
 
-       xpad360_process_packet(xpad, cmd, &data[4]);
+       rcu_read_lock();
+       dev = rcu_dereference(xpad->x360w_dev);
+       if (dev)
+               xpad360_process_packet(xpad, dev, cmd, &data[4]);
+       rcu_read_unlock();
 }
 
 /*
@@ -659,7 +712,7 @@ static void xpad_irq_in(struct urb *urb)
 
        switch (xpad->xtype) {
        case XTYPE_XBOX360:
-               xpad360_process_packet(xpad, 0, xpad->idata);
+               xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata);
                break;
        case XTYPE_XBOX360W:
                xpad360w_process_packet(xpad, 0, xpad->idata);
@@ -678,18 +731,73 @@ exit:
                        __func__, retval);
 }
 
+/* Callers must hold xpad->odata_lock spinlock */
+static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
+{
+       struct xpad_output_packet *pkt, *packet = NULL;
+       int i;
+
+       for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) {
+               if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS)
+                       xpad->last_out_packet = 0;
+
+               pkt = &xpad->out_packets[xpad->last_out_packet];
+               if (pkt->pending) {
+                       dev_dbg(&xpad->intf->dev,
+                               "%s - found pending output packet %d\n",
+                               __func__, xpad->last_out_packet);
+                       packet = pkt;
+                       break;
+               }
+       }
+
+       if (packet) {
+               memcpy(xpad->odata, packet->data, packet->len);
+               xpad->irq_out->transfer_buffer_length = packet->len;
+               return true;
+       }
+
+       return false;
+}
+
+/* Callers must hold xpad->odata_lock spinlock */
+static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
+{
+       int error;
+
+       if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) {
+               usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
+               error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+               if (error) {
+                       dev_err(&xpad->intf->dev,
+                               "%s - usb_submit_urb failed with result %d\n",
+                               __func__, error);
+                       usb_unanchor_urb(xpad->irq_out);
+                       return -EIO;
+               }
+
+               xpad->irq_out_active = true;
+       }
+
+       return 0;
+}
+
 static void xpad_irq_out(struct urb *urb)
 {
        struct usb_xpad *xpad = urb->context;
        struct device *dev = &xpad->intf->dev;
-       int retval, status;
+       int status = urb->status;
+       int error;
+       unsigned long flags;
 
-       status = urb->status;
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
        switch (status) {
        case 0:
                /* success */
-               return;
+               xpad->out_packets[xpad->last_out_packet].pending = false;
+               xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
+               break;
 
        case -ECONNRESET:
        case -ENOENT:
@@ -697,19 +805,28 @@ static void xpad_irq_out(struct urb *urb)
                /* this urb is terminated, clean up */
                dev_dbg(dev, "%s - urb shutting down with status: %d\n",
                        __func__, status);
-               return;
+               xpad->irq_out_active = false;
+               break;
 
        default:
                dev_dbg(dev, "%s - nonzero urb status received: %d\n",
                        __func__, status);
-               goto exit;
+               break;
        }
 
-exit:
-       retval = usb_submit_urb(urb, GFP_ATOMIC);
-       if (retval)
-               dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
-                       __func__, retval);
+       if (xpad->irq_out_active) {
+               usb_anchor_urb(urb, &xpad->irq_out_anchor);
+               error = usb_submit_urb(urb, GFP_ATOMIC);
+               if (error) {
+                       dev_err(dev,
+                               "%s - usb_submit_urb failed with result %d\n",
+                               __func__, error);
+                       usb_unanchor_urb(urb);
+                       xpad->irq_out_active = false;
+               }
+       }
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 }
 
 static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
@@ -721,6 +838,8 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
        if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;
 
+       init_usb_anchor(&xpad->irq_out_anchor);
+
        xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
                                         GFP_KERNEL, &xpad->odata_dma);
        if (!xpad->odata) {
@@ -728,7 +847,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
                goto fail1;
        }
 
-       mutex_init(&xpad->odata_mutex);
+       spin_lock_init(&xpad->odata_lock);
 
        xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL);
        if (!xpad->irq_out) {
@@ -755,8 +874,14 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
 
 static void xpad_stop_output(struct usb_xpad *xpad)
 {
-       if (xpad->xtype != XTYPE_UNKNOWN)
-               usb_kill_urb(xpad->irq_out);
+       if (xpad->xtype != XTYPE_UNKNOWN) {
+               if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor,
+                                                  5000)) {
+                       dev_warn(&xpad->intf->dev,
+                                "timed out waiting for output URB to complete, killing\n");
+                       usb_kill_anchored_urbs(&xpad->irq_out_anchor);
+               }
+       }
 }
 
 static void xpad_deinit_output(struct usb_xpad *xpad)
@@ -770,27 +895,60 @@ static void xpad_deinit_output(struct usb_xpad *xpad)
 
 static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
 {
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_CMD_IDX];
+       unsigned long flags;
        int retval;
 
-       mutex_lock(&xpad->odata_mutex);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
+
+       packet->data[0] = 0x08;
+       packet->data[1] = 0x00;
+       packet->data[2] = 0x0F;
+       packet->data[3] = 0xC0;
+       packet->data[4] = 0x00;
+       packet->data[5] = 0x00;
+       packet->data[6] = 0x00;
+       packet->data[7] = 0x00;
+       packet->data[8] = 0x00;
+       packet->data[9] = 0x00;
+       packet->data[10] = 0x00;
+       packet->data[11] = 0x00;
+       packet->len = 12;
+       packet->pending = true;
+
+       /* Reset the sequence so we send out presence first */
+       xpad->last_out_packet = -1;
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 
-       xpad->odata[0] = 0x08;
-       xpad->odata[1] = 0x00;
-       xpad->odata[2] = 0x0F;
-       xpad->odata[3] = 0xC0;
-       xpad->odata[4] = 0x00;
-       xpad->odata[5] = 0x00;
-       xpad->odata[6] = 0x00;
-       xpad->odata[7] = 0x00;
-       xpad->odata[8] = 0x00;
-       xpad->odata[9] = 0x00;
-       xpad->odata[10] = 0x00;
-       xpad->odata[11] = 0x00;
-       xpad->irq_out->transfer_buffer_length = 12;
+       return retval;
+}
+
+static int xpad_start_xbox_one(struct usb_xpad *xpad)
+{
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_CMD_IDX];
+       unsigned long flags;
+       int retval;
 
-       retval = usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
-       mutex_unlock(&xpad->odata_mutex);
+       /* Xbox one controller needs to be initialized. */
+       packet->data[0] = 0x05;
+       packet->data[1] = 0x20;
+       packet->data[2] = xpad->odata_serial++; /* packet serial */
+       packet->data[3] = 0x01; /* rumble bit enable?  */
+       packet->data[4] = 0x00;
+       packet->len = 5;
+       packet->pending = true;
+
+       /* Reset the sequence so we send out start packet first */
+       xpad->last_out_packet = -1;
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 
        return retval;
 }
@@ -799,8 +957,11 @@ static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
 static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
 {
        struct usb_xpad *xpad = input_get_drvdata(dev);
+       struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX];
        __u16 strong;
        __u16 weak;
+       int retval;
+       unsigned long flags;
 
        if (effect->type != FF_RUMBLE)
                return 0;
@@ -808,69 +969,81 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
        strong = effect->u.rumble.strong_magnitude;
        weak = effect->u.rumble.weak_magnitude;
 
+       spin_lock_irqsave(&xpad->odata_lock, flags);
+
        switch (xpad->xtype) {
        case XTYPE_XBOX:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x06;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = strong / 256;  /* left actuator */
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = weak / 256;    /* right actuator */
-               xpad->irq_out->transfer_buffer_length = 6;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x06;
+               packet->data[2] = 0x00;
+               packet->data[3] = strong / 256; /* left actuator */
+               packet->data[4] = 0x00;
+               packet->data[5] = weak / 256;   /* right actuator */
+               packet->len = 6;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOX360:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x08;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = strong / 256;  /* left actuator? */
-               xpad->odata[4] = weak / 256;    /* right actuator? */
-               xpad->odata[5] = 0x00;
-               xpad->odata[6] = 0x00;
-               xpad->odata[7] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 8;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x08;
+               packet->data[2] = 0x00;
+               packet->data[3] = strong / 256;  /* left actuator? */
+               packet->data[4] = weak / 256;   /* right actuator? */
+               packet->data[5] = 0x00;
+               packet->data[6] = 0x00;
+               packet->data[7] = 0x00;
+               packet->len = 8;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOX360W:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x01;
-               xpad->odata[2] = 0x0F;
-               xpad->odata[3] = 0xC0;
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = strong / 256;
-               xpad->odata[6] = weak / 256;
-               xpad->odata[7] = 0x00;
-               xpad->odata[8] = 0x00;
-               xpad->odata[9] = 0x00;
-               xpad->odata[10] = 0x00;
-               xpad->odata[11] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x01;
+               packet->data[2] = 0x0F;
+               packet->data[3] = 0xC0;
+               packet->data[4] = 0x00;
+               packet->data[5] = strong / 256;
+               packet->data[6] = weak / 256;
+               packet->data[7] = 0x00;
+               packet->data[8] = 0x00;
+               packet->data[9] = 0x00;
+               packet->data[10] = 0x00;
+               packet->data[11] = 0x00;
+               packet->len = 12;
+               packet->pending = true;
                break;
 
        case XTYPE_XBOXONE:
-               xpad->odata[0] = 0x09; /* activate rumble */
-               xpad->odata[1] = 0x08;
-               xpad->odata[2] = 0x00;
-               xpad->odata[3] = 0x08; /* continuous effect */
-               xpad->odata[4] = 0x00; /* simple rumble mode */
-               xpad->odata[5] = 0x03; /* L and R actuator only */
-               xpad->odata[6] = 0x00; /* TODO: LT actuator */
-               xpad->odata[7] = 0x00; /* TODO: RT actuator */
-               xpad->odata[8] = strong / 256;  /* left actuator */
-               xpad->odata[9] = weak / 256;    /* right actuator */
-               xpad->odata[10] = 0x80; /* length of pulse */
-               xpad->odata[11] = 0x00; /* stop period of pulse */
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x09; /* activate rumble */
+               packet->data[1] = 0x08;
+               packet->data[2] = xpad->odata_serial++;
+               packet->data[3] = 0x08; /* continuous effect */
+               packet->data[4] = 0x00; /* simple rumble mode */
+               packet->data[5] = 0x03; /* L and R actuator only */
+               packet->data[6] = 0x00; /* TODO: LT actuator */
+               packet->data[7] = 0x00; /* TODO: RT actuator */
+               packet->data[8] = strong / 512; /* left actuator */
+               packet->data[9] = weak / 512;   /* right actuator */
+               packet->data[10] = 0x80;        /* length of pulse */
+               packet->data[11] = 0x00;        /* stop period of pulse */
+               packet->data[12] = 0x00;
+               packet->len = 13;
+               packet->pending = true;
                break;
 
        default:
                dev_dbg(&xpad->dev->dev,
                        "%s - rumble command sent to unsupported xpad type: %d\n",
                        __func__, xpad->xtype);
-               return -EINVAL;
+               retval = -EINVAL;
+               goto out;
        }
 
-       return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+       retval = xpad_try_sending_next_out_packet(xpad);
+
+out:
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
+       return retval;
 }
 
 static int xpad_init_ff(struct usb_xpad *xpad)
@@ -921,36 +1094,44 @@ struct xpad_led {
  */
 static void xpad_send_led_command(struct usb_xpad *xpad, int command)
 {
+       struct xpad_output_packet *packet =
+                       &xpad->out_packets[XPAD_OUT_LED_IDX];
+       unsigned long flags;
+
        command %= 16;
 
-       mutex_lock(&xpad->odata_mutex);
+       spin_lock_irqsave(&xpad->odata_lock, flags);
 
        switch (xpad->xtype) {
        case XTYPE_XBOX360:
-               xpad->odata[0] = 0x01;
-               xpad->odata[1] = 0x03;
-               xpad->odata[2] = command;
-               xpad->irq_out->transfer_buffer_length = 3;
+               packet->data[0] = 0x01;
+               packet->data[1] = 0x03;
+               packet->data[2] = command;
+               packet->len = 3;
+               packet->pending = true;
                break;
+
        case XTYPE_XBOX360W:
-               xpad->odata[0] = 0x00;
-               xpad->odata[1] = 0x00;
-               xpad->odata[2] = 0x08;
-               xpad->odata[3] = 0x40 + command;
-               xpad->odata[4] = 0x00;
-               xpad->odata[5] = 0x00;
-               xpad->odata[6] = 0x00;
-               xpad->odata[7] = 0x00;
-               xpad->odata[8] = 0x00;
-               xpad->odata[9] = 0x00;
-               xpad->odata[10] = 0x00;
-               xpad->odata[11] = 0x00;
-               xpad->irq_out->transfer_buffer_length = 12;
+               packet->data[0] = 0x00;
+               packet->data[1] = 0x00;
+               packet->data[2] = 0x08;
+               packet->data[3] = 0x40 + command;
+               packet->data[4] = 0x00;
+               packet->data[5] = 0x00;
+               packet->data[6] = 0x00;
+               packet->data[7] = 0x00;
+               packet->data[8] = 0x00;
+               packet->data[9] = 0x00;
+               packet->data[10] = 0x00;
+               packet->data[11] = 0x00;
+               packet->len = 12;
+               packet->pending = true;
                break;
        }
 
-       usb_submit_urb(xpad->irq_out, GFP_KERNEL);
-       mutex_unlock(&xpad->odata_mutex);
+       xpad_try_sending_next_out_packet(xpad);
+
+       spin_unlock_irqrestore(&xpad->odata_lock, flags);
 }
 
 /*
@@ -959,7 +1140,7 @@ static void xpad_send_led_command(struct usb_xpad *xpad, int command)
  */
 static void xpad_identify_controller(struct usb_xpad *xpad)
 {
-       xpad_send_led_command(xpad, (xpad->pad_nr % 4) + 2);
+       led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2);
 }
 
 static void xpad_led_set(struct led_classdev *led_cdev,
@@ -1001,14 +1182,7 @@ static int xpad_led_probe(struct usb_xpad *xpad)
        if (error)
                goto err_free_id;
 
-       if (xpad->xtype == XTYPE_XBOX360) {
-               /*
-                * Light up the segment corresponding to controller
-                * number on wired devices. On wireless we'll do that
-                * when they respond to "presence" packet.
-                */
-               xpad_identify_controller(xpad);
-       }
+       xpad_identify_controller(xpad);
 
        return 0;
 
@@ -1036,37 +1210,73 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) { }
 static void xpad_identify_controller(struct usb_xpad *xpad) { }
 #endif
 
-static int xpad_open(struct input_dev *dev)
+static int xpad_start_input(struct usb_xpad *xpad)
 {
-       struct usb_xpad *xpad = input_get_drvdata(dev);
-
-       /* URB was submitted in probe */
-       if (xpad->xtype == XTYPE_XBOX360W)
-               return 0;
+       int error;
 
-       xpad->irq_in->dev = xpad->udev;
        if (usb_submit_urb(xpad->irq_in, GFP_KERNEL))
                return -EIO;
 
        if (xpad->xtype == XTYPE_XBOXONE) {
-               /* Xbox one controller needs to be initialized. */
-               xpad->odata[0] = 0x05;
-               xpad->odata[1] = 0x20;
-               xpad->irq_out->transfer_buffer_length = 2;
-               return usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+               error = xpad_start_xbox_one(xpad);
+               if (error) {
+                       usb_kill_urb(xpad->irq_in);
+                       return error;
+               }
        }
 
        return 0;
 }
 
-static void xpad_close(struct input_dev *dev)
+static void xpad_stop_input(struct usb_xpad *xpad)
 {
-       struct usb_xpad *xpad = input_get_drvdata(dev);
+       usb_kill_urb(xpad->irq_in);
+}
+
+static int xpad360w_start_input(struct usb_xpad *xpad)
+{
+       int error;
 
-       if (xpad->xtype != XTYPE_XBOX360W)
+       error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+       if (error)
+               return -EIO;
+
+       /*
+        * Send presence packet.
+        * This will force the controller to resend connection packets.
+        * This is useful in the case we activate the module after the
+        * adapter has been plugged in, as it won't automatically
+        * send us info about the controllers.
+        */
+       error = xpad_inquiry_pad_presence(xpad);
+       if (error) {
                usb_kill_urb(xpad->irq_in);
+               return error;
+       }
 
-       xpad_stop_output(xpad);
+       return 0;
+}
+
+static void xpad360w_stop_input(struct usb_xpad *xpad)
+{
+       usb_kill_urb(xpad->irq_in);
+
+       /* Make sure we are done with presence work if it was scheduled */
+       flush_work(&xpad->work);
+}
+
+static int xpad_open(struct input_dev *dev)
+{
+       struct usb_xpad *xpad = input_get_drvdata(dev);
+
+       return xpad_start_input(xpad);
+}
+
+static void xpad_close(struct input_dev *dev)
+{
+       struct usb_xpad *xpad = input_get_drvdata(dev);
+
+       xpad_stop_input(xpad);
 }
 
 static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
@@ -1097,8 +1307,11 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
 
 static void xpad_deinit_input(struct usb_xpad *xpad)
 {
-       xpad_led_disconnect(xpad);
-       input_unregister_device(xpad->dev);
+       if (xpad->input_created) {
+               xpad->input_created = false;
+               xpad_led_disconnect(xpad);
+               input_unregister_device(xpad->dev);
+       }
 }
 
 static int xpad_init_input(struct usb_xpad *xpad)
@@ -1118,8 +1331,10 @@ static int xpad_init_input(struct usb_xpad *xpad)
 
        input_set_drvdata(input_dev, xpad);
 
-       input_dev->open = xpad_open;
-       input_dev->close = xpad_close;
+       if (xpad->xtype != XTYPE_XBOX360W) {
+               input_dev->open = xpad_open;
+               input_dev->close = xpad_close;
+       }
 
        __set_bit(EV_KEY, input_dev->evbit);
 
@@ -1181,6 +1396,7 @@ static int xpad_init_input(struct usb_xpad *xpad)
        if (error)
                goto err_disconnect_led;
 
+       xpad->input_created = true;
        return 0;
 
 err_disconnect_led:
@@ -1241,6 +1457,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
        xpad->mapping = xpad_device[i].mapping;
        xpad->xtype = xpad_device[i].xtype;
        xpad->name = xpad_device[i].name;
+       INIT_WORK(&xpad->work, xpad_presence_work);
 
        if (xpad->xtype == XTYPE_UNKNOWN) {
                if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
@@ -1277,10 +1494,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 
        usb_set_intfdata(intf, xpad);
 
-       error = xpad_init_input(xpad);
-       if (error)
-               goto err_deinit_output;
-
        if (xpad->xtype == XTYPE_XBOX360W) {
                /*
                 * Submit the int URB immediately rather than waiting for open
@@ -1289,28 +1502,24 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
                 * exactly the message that a controller has arrived that
                 * we're waiting for.
                 */
-               xpad->irq_in->dev = xpad->udev;
-               error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+               error = xpad360w_start_input(xpad);
                if (error)
-                       goto err_deinit_input;
-
+                       goto err_deinit_output;
                /*
-                * Send presence packet.
-                * This will force the controller to resend connection packets.
-                * This is useful in the case we activate the module after the
-                * adapter has been plugged in, as it won't automatically
-                * send us info about the controllers.
+                * Wireless controllers require RESET_RESUME to work properly
+                * after suspend. Ideally this quirk should be in usb core
+                * quirk list, but we have too many vendors producing these
+                * controllers and we'd need to maintain 2 identical lists
+                * here in this driver and in usb core.
                 */
-               error = xpad_inquiry_pad_presence(xpad);
+               udev->quirks |= USB_QUIRK_RESET_RESUME;
+       } else {
+               error = xpad_init_input(xpad);
                if (error)
-                       goto err_kill_in_urb;
+                       goto err_deinit_output;
        }
        return 0;
 
-err_kill_in_urb:
-       usb_kill_urb(xpad->irq_in);
-err_deinit_input:
-       xpad_deinit_input(xpad);
 err_deinit_output:
        xpad_deinit_output(xpad);
 err_free_in_urb:
@@ -1320,19 +1529,24 @@ err_free_idata:
 err_free_mem:
        kfree(xpad);
        return error;
-
 }
 
 static void xpad_disconnect(struct usb_interface *intf)
 {
-       struct usb_xpad *xpad = usb_get_intfdata (intf);
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+
+       if (xpad->xtype == XTYPE_XBOX360W)
+               xpad360w_stop_input(xpad);
 
        xpad_deinit_input(xpad);
-       xpad_deinit_output(xpad);
 
-       if (xpad->xtype == XTYPE_XBOX360W) {
-               usb_kill_urb(xpad->irq_in);
-       }
+       /*
+        * Now that both input device and LED device are gone we can
+        * stop output URB.
+        */
+       xpad_stop_output(xpad);
+
+       xpad_deinit_output(xpad);
 
        usb_free_urb(xpad->irq_in);
        usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -1343,10 +1557,55 @@ static void xpad_disconnect(struct usb_interface *intf)
        usb_set_intfdata(intf, NULL);
 }
 
+static int xpad_suspend(struct usb_interface *intf, pm_message_t message)
+{
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+       struct input_dev *input = xpad->dev;
+
+       if (xpad->xtype == XTYPE_XBOX360W) {
+               /*
+                * Wireless controllers always listen to input so
+                * they are notified when controller shows up
+                * or goes away.
+                */
+               xpad360w_stop_input(xpad);
+       } else {
+               mutex_lock(&input->mutex);
+               if (input->users)
+                       xpad_stop_input(xpad);
+               mutex_unlock(&input->mutex);
+       }
+
+       xpad_stop_output(xpad);
+
+       return 0;
+}
+
+static int xpad_resume(struct usb_interface *intf)
+{
+       struct usb_xpad *xpad = usb_get_intfdata(intf);
+       struct input_dev *input = xpad->dev;
+       int retval = 0;
+
+       if (xpad->xtype == XTYPE_XBOX360W) {
+               retval = xpad360w_start_input(xpad);
+       } else {
+               mutex_lock(&input->mutex);
+               if (input->users)
+                       retval = xpad_start_input(xpad);
+               mutex_unlock(&input->mutex);
+       }
+
+       return retval;
+}
+
 static struct usb_driver xpad_driver = {
        .name           = "xpad",
        .probe          = xpad_probe,
        .disconnect     = xpad_disconnect,
+       .suspend        = xpad_suspend,
+       .resume         = xpad_resume,
+       .reset_resume   = xpad_resume,
        .id_table       = xpad_table,
 };
 
index b9f01bd1b7ef1b76a7b4c0d46f71fc3e4ace1818..29093657f2ef8233c667aace8406790a7d859f1d 100644 (file)
@@ -630,7 +630,7 @@ gpio_keys_get_devtree_pdata(struct device *dev)
        if (!node)
                return ERR_PTR(-ENODEV);
 
-       nbuttons = of_get_child_count(node);
+       nbuttons = of_get_available_child_count(node);
        if (nbuttons == 0)
                return ERR_PTR(-ENODEV);
 
@@ -645,8 +645,10 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 
        pdata->rep = !!of_get_property(node, "autorepeat", NULL);
 
+       of_property_read_string(node, "label", &pdata->name);
+
        i = 0;
-       for_each_child_of_node(node, pp) {
+       for_each_available_child_of_node(node, pp) {
                enum of_gpio_flags flags;
 
                button = &pdata->buttons[i++];
index 2d5794ec338b72a1cea0367bccdaf9654db025d4..2160512e861af57d289fb5873ef80b81c2fbf7c9 100644 (file)
@@ -113,8 +113,8 @@ struct t7_config {
 #define MXT_T9_DETECT          (1 << 7)
 
 struct t9_range {
-       u16 x;
-       u16 y;
+       __le16 x;
+       __le16 y;
 } __packed;
 
 /* MXT_TOUCH_MULTI_T9 orient */
@@ -216,6 +216,7 @@ struct mxt_data {
        unsigned int irq;
        unsigned int max_x;
        unsigned int max_y;
+       bool xy_switch;
        bool in_bootloader;
        u16 mem_size;
        u8 t100_aux_ampl;
@@ -1665,8 +1666,8 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
        if (error)
                return error;
 
-       le16_to_cpus(&range.x);
-       le16_to_cpus(&range.y);
+       data->max_x = get_unaligned_le16(&range.x);
+       data->max_y = get_unaligned_le16(&range.y);
 
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T9_ORIENT,
@@ -1674,23 +1675,7 @@ static int mxt_read_t9_resolution(struct mxt_data *data)
        if (error)
                return error;
 
-       /* Handle default values */
-       if (range.x == 0)
-               range.x = 1023;
-
-       if (range.y == 0)
-               range.y = 1023;
-
-       if (orient & MXT_T9_ORIENT_SWITCH) {
-               data->max_x = range.y;
-               data->max_y = range.x;
-       } else {
-               data->max_x = range.x;
-               data->max_y = range.y;
-       }
-
-       dev_dbg(&client->dev,
-               "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+       data->xy_switch = orient & MXT_T9_ORIENT_SWITCH;
 
        return 0;
 }
@@ -1708,13 +1693,14 @@ static int mxt_read_t100_config(struct mxt_data *data)
        if (!object)
                return -EINVAL;
 
+       /* read touchscreen dimensions */
        error = __mxt_read_reg(client,
                               object->start_address + MXT_T100_XRANGE,
                               sizeof(range_x), &range_x);
        if (error)
                return error;
 
-       le16_to_cpus(&range_x);
+       data->max_x = get_unaligned_le16(&range_x);
 
        error = __mxt_read_reg(client,
                               object->start_address + MXT_T100_YRANGE,
@@ -1722,36 +1708,24 @@ static int mxt_read_t100_config(struct mxt_data *data)
        if (error)
                return error;
 
-       le16_to_cpus(&range_y);
+       data->max_y = get_unaligned_le16(&range_y);
 
+       /* read orientation config */
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T100_CFG1,
                                1, &cfg);
        if (error)
                return error;
 
+       data->xy_switch = cfg & MXT_T100_CFG_SWITCHXY;
+
+       /* allocate aux bytes */
        error =  __mxt_read_reg(client,
                                object->start_address + MXT_T100_TCHAUX,
                                1, &tchaux);
        if (error)
                return error;
 
-       /* Handle default values */
-       if (range_x == 0)
-               range_x = 1023;
-
-       if (range_y == 0)
-               range_y = 1023;
-
-       if (cfg & MXT_T100_CFG_SWITCHXY) {
-               data->max_x = range_y;
-               data->max_y = range_x;
-       } else {
-               data->max_x = range_x;
-               data->max_y = range_y;
-       }
-
-       /* allocate aux bytes */
        aux = 6;
 
        if (tchaux & MXT_T100_TCHAUX_VECT)
@@ -1767,9 +1741,6 @@ static int mxt_read_t100_config(struct mxt_data *data)
                "T100 aux mappings vect:%u ampl:%u area:%u\n",
                data->t100_aux_vect, data->t100_aux_ampl, data->t100_aux_area);
 
-       dev_info(&client->dev,
-                "T100 Touchscreen size X%uY%u\n", data->max_x, data->max_y);
-
        return 0;
 }
 
@@ -1828,6 +1799,19 @@ static int mxt_initialize_input_device(struct mxt_data *data)
                return -EINVAL;
        }
 
+       /* Handle default values and orientation switch */
+       if (data->max_x == 0)
+               data->max_x = 1023;
+
+       if (data->max_y == 0)
+               data->max_y = 1023;
+
+       if (data->xy_switch)
+               swap(data->max_x, data->max_y);
+
+       dev_info(dev, "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+       /* Register input device */
        input_dev = input_allocate_device();
        if (!input_dev) {
                dev_err(dev, "Failed to allocate memory\n");
index 539b0dea8034b5d5b61e93f5927b353448197c67..e5e223938eecc9f013e33977b1de50bbb3cfae3d 100644 (file)
@@ -2049,7 +2049,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
        /* Update device table */
        set_dte_entry(dev_data->devid, domain, ats);
        if (alias != dev_data->devid)
-               set_dte_entry(dev_data->devid, domain, ats);
+               set_dte_entry(alias, domain, ats);
 
        device_flush_dte(dev_data);
 }
index ac7387686ddc7b2a7c7757f2cb3fbd003c8a23af..986a53e3eb96b4bb56faedfdb36b4bd658aacb99 100644 (file)
@@ -1489,7 +1489,7 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
        struct pci_dev *pdev;
 
-       if (dev_is_pci(info->dev))
+       if (!dev_is_pci(info->dev))
                return;
 
        pdev = to_pci_dev(info->dev);
index 8bbcbfe7695cf82aabda359b7a9036527e4951b3..381ca5a37a7b7de9b338236b28202c6c46f306df 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 
index 11fc2a27fa2ea90da2b65ef282a5db139c594c71..fb50911b3940c0bd7234a1312e320ccff020f492 100644 (file)
@@ -130,6 +130,11 @@ config ORION_IRQCHIP
        select IRQ_DOMAIN
        select MULTI_IRQ_HANDLER
 
+config PIC32_EVIC
+       bool
+       select GENERIC_IRQ_CHIP
+       select IRQ_DOMAIN
+
 config RENESAS_INTC_IRQPIN
        bool
        select IRQ_DOMAIN
@@ -154,6 +159,7 @@ config TB10X_IRQC
 config TS4800_IRQ
        tristate "TS-4800 IRQ controller"
        select IRQ_DOMAIN
+       depends on HAS_IOMEM
        help
          Support for the TS-4800 FPGA IRQ controller
 
index d4c2e4ebc30809fcb21b9f705f416e9c8dbb7ee3..18caacb60d581e953407ac6cae347c97bf7c4ce7 100644 (file)
@@ -58,3 +58,4 @@ obj-$(CONFIG_RENESAS_H8S_INTC)                += irq-renesas-h8s.o
 obj-$(CONFIG_ARCH_SA1100)              += irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)              += irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)                        += irq-imx-gpcv2.o
+obj-$(CONFIG_PIC32_EVIC)               += irq-pic32-evic.o
index b12a5d58546f922b460d6aac34c073ff363ef1b8..37199b9b2cfa260659a98da3eb1a48053e7bddfb 100644 (file)
@@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val)
            priority > AT91_AIC_IRQ_MAX_PRIORITY)
                return -EINVAL;
 
-       *val &= AT91_AIC_PRIOR;
+       *val &= ~AT91_AIC_PRIOR;
        *val |= priority;
 
        return 0;
index e23d1d18f9d6a39731bdd34633e2bbc9bc473525..3447549fcc9306a37e7695d27e06589d45d3d0e4 100644 (file)
@@ -875,6 +875,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
                }
 
                alloc_size = (1 << order) * PAGE_SIZE;
+retry_alloc_baser:
                alloc_pages = (alloc_size / psz);
                if (alloc_pages > GITS_BASER_PAGES_MAX) {
                        alloc_pages = GITS_BASER_PAGES_MAX;
@@ -938,13 +939,16 @@ retry_baser:
                         * size and retry. If we reach 4K, then
                         * something is horribly wrong...
                         */
+                       free_pages((unsigned long)base, order);
+                       its->tables[i] = NULL;
+
                        switch (psz) {
                        case SZ_16K:
                                psz = SZ_4K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        case SZ_64K:
                                psz = SZ_16K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        }
                }
 
index c22e2d40cb302452624c29efd44c483e577f5333..efe50845939d91fee149acb085912697031dd9a0 100644 (file)
@@ -241,6 +241,7 @@ static int __init asm9260_of_init(struct device_node *np,
                writel(0, icoll_priv.intr + i);
 
        icoll_add_domain(np, ASM9260_NUM_IRQS);
+       set_handle_irq(icoll_handle_irq);
 
        return 0;
 }
diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c
new file mode 100644 (file)
index 0000000..e7155db
--- /dev/null
@@ -0,0 +1,324 @@
+/*
+ * Cristian Birsan <cristian.birsan@microchip.com>
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2016 Microchip Technology Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irq.h>
+
+#include <asm/irq.h>
+#include <asm/traps.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define REG_INTCON     0x0000
+#define REG_INTSTAT    0x0020
+#define REG_IFS_OFFSET 0x0040
+#define REG_IEC_OFFSET 0x00C0
+#define REG_IPC_OFFSET 0x0140
+#define REG_OFF_OFFSET 0x0540
+
+#define MAJPRI_MASK    0x07
+#define SUBPRI_MASK    0x03
+#define PRIORITY_MASK  0x1F
+
+#define PIC32_INT_PRI(pri, subpri)                             \
+       ((((pri) & MAJPRI_MASK) << 2) | ((subpri) & SUBPRI_MASK))
+
+struct evic_chip_data {
+       u32 irq_types[NR_IRQS];
+       u32 ext_irqs[8];
+};
+
+static struct irq_domain *evic_irq_domain;
+static void __iomem *evic_base;
+
+asmlinkage void __weak plat_irq_dispatch(void)
+{
+       unsigned int irq, hwirq;
+
+       hwirq = readl(evic_base + REG_INTSTAT) & 0xFF;
+       irq = irq_linear_revmap(evic_irq_domain, hwirq);
+       do_IRQ(irq);
+}
+
+static struct evic_chip_data *irqd_to_priv(struct irq_data *data)
+{
+       return (struct evic_chip_data *)data->domain->host_data;
+}
+
+static int pic32_set_ext_polarity(int bit, u32 type)
+{
+       /*
+        * External interrupts can be either edge rising or edge falling,
+        * but not both.
+        */
+       switch (type) {
+       case IRQ_TYPE_EDGE_RISING:
+               writel(BIT(bit), evic_base + PIC32_SET(REG_INTCON));
+               break;
+       case IRQ_TYPE_EDGE_FALLING:
+               writel(BIT(bit), evic_base + PIC32_CLR(REG_INTCON));
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int pic32_set_type_edge(struct irq_data *data,
+                              unsigned int flow_type)
+{
+       struct evic_chip_data *priv = irqd_to_priv(data);
+       int ret;
+       int i;
+
+       if (!(flow_type & IRQ_TYPE_EDGE_BOTH))
+               return -EBADR;
+
+       /* set polarity for external interrupts only */
+       for (i = 0; i < ARRAY_SIZE(priv->ext_irqs); i++) {
+               if (priv->ext_irqs[i] == data->hwirq) {
+                       ret = pic32_set_ext_polarity(i + 1, flow_type);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       irqd_set_trigger_type(data, flow_type);
+
+       return IRQ_SET_MASK_OK;
+}
+
+static void pic32_bind_evic_interrupt(int irq, int set)
+{
+       writel(set, evic_base + REG_OFF_OFFSET + irq * 4);
+}
+
+static void pic32_set_irq_priority(int irq, int priority)
+{
+       u32 reg, shift;
+
+       reg = irq / 4;
+       shift = (irq % 4) * 8;
+
+       writel(PRIORITY_MASK << shift,
+               evic_base + PIC32_CLR(REG_IPC_OFFSET + reg * 0x10));
+       writel(priority << shift,
+               evic_base + PIC32_SET(REG_IPC_OFFSET + reg * 0x10));
+}
+
+#define IRQ_REG_MASK(_hwirq, _reg, _mask)                     \
+       do {                                                   \
+               _reg = _hwirq / 32;                            \
+               _mask = 1 << (_hwirq % 32);                    \
+       } while (0)
+
+static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq,
+                               irq_hw_number_t hw)
+{
+       struct evic_chip_data *priv = d->host_data;
+       struct irq_data *data;
+       int ret;
+       u32 iecclr, ifsclr;
+       u32 reg, mask;
+
+       ret = irq_map_generic_chip(d, virq, hw);
+       if (ret)
+               return ret;
+
+       /*
+        * Piggyback on xlate function to move to an alternate chip as necessary
+        * at time of mapping instead of allowing the flow handler/chip to be
+        * changed later. This requires all interrupts to be configured through
+        * DT.
+        */
+       if (priv->irq_types[hw] & IRQ_TYPE_SENSE_MASK) {
+               data = irq_domain_get_irq_data(d, virq);
+               irqd_set_trigger_type(data, priv->irq_types[hw]);
+               irq_setup_alt_chip(data, priv->irq_types[hw]);
+       }
+
+       IRQ_REG_MASK(hw, reg, mask);
+
+       iecclr = PIC32_CLR(REG_IEC_OFFSET + reg * 0x10);
+       ifsclr = PIC32_CLR(REG_IFS_OFFSET + reg * 0x10);
+
+       /* mask and clear flag */
+       writel(mask, evic_base + iecclr);
+       writel(mask, evic_base + ifsclr);
+
+       /* default priority is required */
+       pic32_set_irq_priority(hw, PIC32_INT_PRI(2, 0));
+
+       return ret;
+}
+
+int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+                          const u32 *intspec, unsigned int intsize,
+                          irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+       struct evic_chip_data *priv = d->host_data;
+
+       if (WARN_ON(intsize < 2))
+               return -EINVAL;
+
+       if (WARN_ON(intspec[0] >= NR_IRQS))
+               return -EINVAL;
+
+       *out_hwirq = intspec[0];
+       *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+       priv->irq_types[intspec[0]] = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+       return 0;
+}
+
+static const struct irq_domain_ops pic32_irq_domain_ops = {
+       .map    = pic32_irq_domain_map,
+       .xlate  = pic32_irq_domain_xlate,
+};
+
+static void __init pic32_ext_irq_of_init(struct irq_domain *domain)
+{
+       struct device_node *node = irq_domain_get_of_node(domain);
+       struct evic_chip_data *priv = domain->host_data;
+       struct property *prop;
+       const __le32 *p;
+       u32 hwirq;
+       int i = 0;
+       const char *pname = "microchip,external-irqs";
+
+       of_property_for_each_u32(node, pname, prop, p, hwirq) {
+               if (i >= ARRAY_SIZE(priv->ext_irqs)) {
+                       pr_warn("More than %d external irq, skip rest\n",
+                               ARRAY_SIZE(priv->ext_irqs));
+                       break;
+               }
+
+               priv->ext_irqs[i] = hwirq;
+               i++;
+       }
+}
+
+static int __init pic32_of_init(struct device_node *node,
+                               struct device_node *parent)
+{
+       struct irq_chip_generic *gc;
+       struct evic_chip_data *priv;
+       unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+       int nchips, ret;
+       int i;
+
+       nchips = DIV_ROUND_UP(NR_IRQS, 32);
+
+       evic_base = of_iomap(node, 0);
+       if (!evic_base)
+               return -ENOMEM;
+
+       priv = kcalloc(nchips, sizeof(*priv), GFP_KERNEL);
+       if (!priv) {
+               ret = -ENOMEM;
+               goto err_iounmap;
+       }
+
+       evic_irq_domain = irq_domain_add_linear(node, nchips * 32,
+                                               &pic32_irq_domain_ops,
+                                               priv);
+       if (!evic_irq_domain) {
+               ret = -ENOMEM;
+               goto err_free_priv;
+       }
+
+       /*
+        * The PIC32 EVIC has a linear list of irqs and the type of each
+        * irq is determined by the hardware peripheral the EVIC is arbitrating.
+        * These irq types are defined in the datasheet as "persistent" and
+        * "non-persistent" which are mapped here to level and edge
+        * respectively. To manage the different flow handler requirements of
+        * each irq type, different chip_types are used.
+        */
+       ret = irq_alloc_domain_generic_chips(evic_irq_domain, 32, 2,
+                                            "evic-level", handle_level_irq,
+                                            clr, 0, 0);
+       if (ret)
+               goto err_domain_remove;
+
+       board_bind_eic_interrupt = &pic32_bind_evic_interrupt;
+
+       for (i = 0; i < nchips; i++) {
+               u32 ifsclr = PIC32_CLR(REG_IFS_OFFSET + (i * 0x10));
+               u32 iec = REG_IEC_OFFSET + (i * 0x10);
+
+               gc = irq_get_domain_generic_chip(evic_irq_domain, i * 32);
+
+               gc->reg_base = evic_base;
+               gc->unused = 0;
+
+               /*
+                * Level/persistent interrupts have a special requirement that
+                * the condition generating the interrupt be cleared before the
+                * interrupt flag (ifs) can be cleared. chip.irq_eoi is used to
+                * complete the interrupt with an ack.
+                */
+               gc->chip_types[0].type                  = IRQ_TYPE_LEVEL_MASK;
+               gc->chip_types[0].handler               = handle_fasteoi_irq;
+               gc->chip_types[0].regs.ack              = ifsclr;
+               gc->chip_types[0].regs.mask             = iec;
+               gc->chip_types[0].chip.name             = "evic-level";
+               gc->chip_types[0].chip.irq_eoi          = irq_gc_ack_set_bit;
+               gc->chip_types[0].chip.irq_mask         = irq_gc_mask_clr_bit;
+               gc->chip_types[0].chip.irq_unmask       = irq_gc_mask_set_bit;
+               gc->chip_types[0].chip.flags            = IRQCHIP_SKIP_SET_WAKE;
+
+               /* Edge interrupts */
+               gc->chip_types[1].type                  = IRQ_TYPE_EDGE_BOTH;
+               gc->chip_types[1].handler               = handle_edge_irq;
+               gc->chip_types[1].regs.ack              = ifsclr;
+               gc->chip_types[1].regs.mask             = iec;
+               gc->chip_types[1].chip.name             = "evic-edge";
+               gc->chip_types[1].chip.irq_ack          = irq_gc_ack_set_bit;
+               gc->chip_types[1].chip.irq_mask         = irq_gc_mask_clr_bit;
+               gc->chip_types[1].chip.irq_unmask       = irq_gc_mask_set_bit;
+               gc->chip_types[1].chip.irq_set_type     = pic32_set_type_edge;
+               gc->chip_types[1].chip.flags            = IRQCHIP_SKIP_SET_WAKE;
+
+               gc->private = &priv[i];
+       }
+
+       irq_set_default_host(evic_irq_domain);
+
+       /*
+        * External interrupts have software configurable edge polarity. These
+        * interrupts are defined in DT allowing polarity to be configured only
+        * for these interrupts when requested.
+        */
+       pic32_ext_irq_of_init(evic_irq_domain);
+
+       return 0;
+
+err_domain_remove:
+       irq_domain_remove(evic_irq_domain);
+
+err_free_priv:
+       kfree(priv);
+
+err_iounmap:
+       iounmap(evic_base);
+
+       return ret;
+}
+
+IRQCHIP_DECLARE(pic32_evic, "microchip,pic32mzda-evic", pic32_of_init);
index c71914e8f596c3700d8c5a4da2448b32acf8c41f..5dc5a760c7236971a81dfcf70cd78542abadf8f4 100644 (file)
@@ -605,7 +605,7 @@ err:
        return ERR_PTR(ret);
 }
 
-static struct s3c_irq_data init_eint[32] = {
+static struct s3c_irq_data __maybe_unused init_eint[32] = {
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
index 546d05f4358a7c452ee80366b3ae782e685fd7ef..b2bbe8659beda5ad8182b5f4ff7bbefeda970459 100644 (file)
@@ -81,6 +81,7 @@ config STI_MBOX
 config MAILBOX_TEST
        tristate "Mailbox Test Client"
        depends on OF
+       depends on HAS_IOMEM
        help
          Test client to help with testing new Controller driver
          implementations.
index 45d85aea9955404d0c4567f33863e0e3f500674f..8f779a1ec99c4b248c9721fceef19b579432539b 100644 (file)
@@ -81,16 +81,10 @@ static struct mbox_controller pcc_mbox_ctrl = {};
  */
 static struct mbox_chan *get_pcc_channel(int id)
 {
-       struct mbox_chan *pcc_chan;
-
        if (id < 0 || id > pcc_mbox_ctrl.num_chans)
                return ERR_PTR(-ENOENT);
 
-       pcc_chan = (struct mbox_chan *)
-               (unsigned long) pcc_mbox_channels +
-               (id * sizeof(*pcc_chan));
-
-       return pcc_chan;
+       return &pcc_mbox_channels[id];
 }
 
 /**
index 4f22e919787aba1bcab7b8f6799a3c22a6b7ce28..d80cce499a56e595d1f2679170124ca5fc620315 100644 (file)
@@ -210,10 +210,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
        struct block_device *bdev;
        struct mddev *mddev = bitmap->mddev;
        struct bitmap_storage *store = &bitmap->storage;
-       int node_offset = 0;
-
-       if (mddev_is_clustered(bitmap->mddev))
-               node_offset = bitmap->cluster_slot * store->file_pages;
 
        while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
                int size = PAGE_SIZE;
index 4a8e15058e8b56a8a9d89d9f0db50135df44c6d3..685aa2d77e2526935f8f2f416ad8d8681b6c7b14 100644 (file)
@@ -170,7 +170,7 @@ static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
                conf->nfaults = n+1;
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void faulty_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct faulty_conf *conf = mddev->private;
        int failit = 0;
@@ -226,7 +226,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
        generic_make_request(bio);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void faulty_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct faulty_conf *conf = mddev->private;
        int n;
@@ -259,7 +259,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
 }
 
 
-static int reshape(struct mddev *mddev)
+static int faulty_reshape(struct mddev *mddev)
 {
        int mode = mddev->new_layout & ModeMask;
        int count = mddev->new_layout >> ModeShift;
@@ -299,7 +299,7 @@ static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disk
        return sectors;
 }
 
-static int run(struct mddev *mddev)
+static int faulty_run(struct mddev *mddev)
 {
        struct md_rdev *rdev;
        int i;
@@ -327,7 +327,7 @@ static int run(struct mddev *mddev)
        md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
        mddev->private = conf;
 
-       reshape(mddev);
+       faulty_reshape(mddev);
 
        return 0;
 }
@@ -344,11 +344,11 @@ static struct md_personality faulty_personality =
        .name           = "faulty",
        .level          = LEVEL_FAULTY,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = faulty_make_request,
+       .run            = faulty_run,
        .free           = faulty_free,
-       .status         = status,
-       .check_reshape  = reshape,
+       .status         = faulty_status,
+       .check_reshape  = faulty_reshape,
        .size           = faulty_size,
 };
 
index 0ded8e97751d270dbfdae0aac73a792043f10621..dd97d42458226b4b284b488983e218e01c814889 100644 (file)
@@ -293,6 +293,7 @@ static void recover_bitmaps(struct md_thread *thread)
 dlm_unlock:
                dlm_unlock_sync(bm_lockres);
 clear_bit:
+               lockres_free(bm_lockres);
                clear_bit(slot, &cinfo->recovery_map);
        }
 }
@@ -682,8 +683,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                bm_lockres = lockres_init(mddev, str, NULL, 1);
                if (!bm_lockres)
                        return -ENOMEM;
-               if (i == (cinfo->slot_number - 1))
+               if (i == (cinfo->slot_number - 1)) {
+                       lockres_free(bm_lockres);
                        continue;
+               }
 
                bm_lockres->flags |= DLM_LKF_NOQUEUE;
                ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
@@ -858,6 +861,7 @@ static int leave(struct mddev *mddev)
        lockres_free(cinfo->token_lockres);
        lockres_free(cinfo->ack_lockres);
        lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->resync_lockres);
        lockres_free(cinfo->bitmap_lockres);
        unlock_all_bitmaps(mddev);
        dlm_release_lockspace(cinfo->lockspace, 2);
index c4b9134092260f22939027a0a82945b146a4ebf9..4e3843f7d24592cc596df1fc6d6c4577a9a6f57f 100644 (file)
@@ -1044,7 +1044,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
        kfree(plug);
 }
 
-static void make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r1conf *conf = mddev->private;
        struct raid1_info *mirror;
@@ -1422,7 +1422,7 @@ read_again:
        wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r1conf *conf = mddev->private;
        int i;
@@ -1439,7 +1439,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
        seq_printf(seq, "]");
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r1conf *conf = mddev->private;
@@ -2472,7 +2472,8 @@ static int init_resync(struct r1conf *conf)
  * that can be installed to exclude normal IO requests.
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
+                                  int *skipped)
 {
        struct r1conf *conf = mddev->private;
        struct r1bio *r1_bio;
@@ -2890,7 +2891,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 }
 
 static void raid1_free(struct mddev *mddev, void *priv);
-static int run(struct mddev *mddev)
+static int raid1_run(struct mddev *mddev)
 {
        struct r1conf *conf;
        int i;
@@ -3170,15 +3171,15 @@ static struct md_personality raid1_personality =
        .name           = "raid1",
        .level          = 1,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid1_make_request,
+       .run            = raid1_run,
        .free           = raid1_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid1_status,
+       .error_handler  = raid1_error,
        .hot_add_disk   = raid1_add_disk,
        .hot_remove_disk= raid1_remove_disk,
        .spare_active   = raid1_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid1_sync_request,
        .resize         = raid1_resize,
        .size           = raid1_size,
        .check_reshape  = raid1_reshape,
index ce959b4ae4dfd9a09d33828f5706d3a6aadd7f71..1c1447dd3417cd3e27f7c097a004b5e9b17a10d5 100644 (file)
@@ -1442,7 +1442,7 @@ retry_write:
        one_write_done(r10_bio);
 }
 
-static void make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct r10conf *conf = mddev->private;
        sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
@@ -1484,7 +1484,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
        wake_up(&conf->wait_barrier);
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid10_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r10conf *conf = mddev->private;
        int i;
@@ -1562,7 +1562,7 @@ static int enough(struct r10conf *conf, int ignore)
                _enough(conf, 1, ignore);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r10conf *conf = mddev->private;
@@ -2802,7 +2802,7 @@ static int init_resync(struct r10conf *conf)
  *
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
+static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                             int *skipped)
 {
        struct r10conf *conf = mddev->private;
@@ -3523,7 +3523,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
        return ERR_PTR(err);
 }
 
-static int run(struct mddev *mddev)
+static int raid10_run(struct mddev *mddev)
 {
        struct r10conf *conf;
        int i, disk_idx, chunk_size;
@@ -4617,15 +4617,15 @@ static struct md_personality raid10_personality =
        .name           = "raid10",
        .level          = 10,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid10_make_request,
+       .run            = raid10_run,
        .free           = raid10_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid10_status,
+       .error_handler  = raid10_error,
        .hot_add_disk   = raid10_add_disk,
        .hot_remove_disk= raid10_remove_disk,
        .spare_active   = raid10_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid10_sync_request,
        .quiesce        = raid10_quiesce,
        .size           = raid10_size,
        .resize         = raid10_resize,
index a086014dcd49915d5dad8b2c57b9bbfa30b39e5c..b4f02c9959f23c1bb5e8feccb2b9bf6e1c59e862 100644 (file)
@@ -2496,7 +2496,7 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
        dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
 
-static void error(struct mddev *mddev, struct md_rdev *rdev)
+static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r5conf *conf = mddev->private;
@@ -2958,7 +2958,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
         * If several bio share a stripe. The bio bi_phys_segments acts as a
         * reference count to avoid race. The reference count should already be
         * increased before this function is called (for example, in
-        * make_request()), so other bio sharing this stripe will not free the
+        * raid5_make_request()), so other bio sharing this stripe will not free the
         * stripe. If a stripe is owned by one stripe, the stripe lock will
         * protect it.
         */
@@ -5135,7 +5135,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
        }
 }
 
-static void make_request(struct mddev *mddev, struct bio * bi)
+static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 {
        struct r5conf *conf = mddev->private;
        int dd_idx;
@@ -5225,7 +5225,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                new_sector = raid5_compute_sector(conf, logical_sector,
                                                  previous,
                                                  &dd_idx, NULL);
-               pr_debug("raid456: make_request, sector %llu logical %llu\n",
+               pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
                        (unsigned long long)new_sector,
                        (unsigned long long)logical_sector);
 
@@ -5575,7 +5575,8 @@ ret:
        return retn;
 }
 
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
+static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
+                                         int *skipped)
 {
        struct r5conf *conf = mddev->private;
        struct stripe_head *sh;
@@ -6674,7 +6675,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
        return 0;
 }
 
-static int run(struct mddev *mddev)
+static int raid5_run(struct mddev *mddev)
 {
        struct r5conf *conf;
        int working_disks = 0;
@@ -7048,7 +7049,7 @@ static void raid5_free(struct mddev *mddev, void *priv)
        mddev->to_remove = &raid5_attrs_group;
 }
 
-static void status(struct seq_file *seq, struct mddev *mddev)
+static void raid5_status(struct seq_file *seq, struct mddev *mddev)
 {
        struct r5conf *conf = mddev->private;
        int i;
@@ -7864,15 +7865,15 @@ static struct md_personality raid6_personality =
        .name           = "raid6",
        .level          = 6,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid6_check_reshape,
@@ -7887,15 +7888,15 @@ static struct md_personality raid5_personality =
        .name           = "raid5",
        .level          = 5,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
@@ -7911,15 +7912,15 @@ static struct md_personality raid4_personality =
        .name           = "raid4",
        .level          = 4,
        .owner          = THIS_MODULE,
-       .make_request   = make_request,
-       .run            = run,
+       .make_request   = raid5_make_request,
+       .run            = raid5_run,
        .free           = raid5_free,
-       .status         = status,
-       .error_handler  = error,
+       .status         = raid5_status,
+       .error_handler  = raid5_error,
        .hot_add_disk   = raid5_add_disk,
        .hot_remove_disk= raid5_remove_disk,
        .spare_active   = raid5_spare_active,
-       .sync_request   = sync_request,
+       .sync_request   = raid5_sync_request,
        .resize         = raid5_resize,
        .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
index 0e209b56c76c1c487007121e336e0438944acb93..c6abeb4fba9db51827fc815fde920b1a30faa1fa 100644 (file)
@@ -903,9 +903,18 @@ static int tda1004x_get_fe(struct dvb_frontend *fe)
 {
        struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache;
        struct tda1004x_state* state = fe->demodulator_priv;
+       int status;
 
        dprintk("%s\n", __func__);
 
+       status = tda1004x_read_byte(state, TDA1004X_STATUS_CD);
+       if (status == -1)
+               return -EIO;
+
+       /* Only update the properties cache if device is locked */
+       if (!(status & 8))
+               return 0;
+
        // inversion status
        fe_params->inversion = INVERSION_OFF;
        if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20)
index 830491960add2d239d817e46db7c4f0d2dc8d00a..bf82726fd3f44f684ac5677286764255055cc9a7 100644 (file)
@@ -478,7 +478,6 @@ static const struct i2c_device_id ir_kbd_id[] = {
        { "ir_rx_z8f0811_hdpvr", 0 },
        { }
 };
-MODULE_DEVICE_TABLE(i2c, ir_kbd_id);
 
 static struct i2c_driver ir_kbd_driver = {
        .driver = {
index b9e43ffa50859caeb4ce2b8343f660a598d249ff..cbe4711e9b31acfca212a8ef0a96867c03bb45fb 100644 (file)
@@ -144,8 +144,7 @@ static int s5k6a3_set_fmt(struct v4l2_subdev *sd,
        mf = __s5k6a3_get_format(sensor, cfg, fmt->pad, fmt->which);
        if (mf) {
                mutex_lock(&sensor->lock);
-               if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE)
-                       *mf = fmt->format;
+               *mf = fmt->format;
                mutex_unlock(&sensor->lock);
        }
        return 0;
index 1d2c310ce838e5fb415ebd4d3b533ac11b8c4b9b..94f8162444071ca3168af652b0c260acaeeab021 100644 (file)
@@ -1211,6 +1211,8 @@ static int alsa_device_init(struct saa7134_dev *dev)
 
 static int alsa_device_exit(struct saa7134_dev *dev)
 {
+       if (!snd_saa7134_cards[dev->nr])
+               return 1;
 
        snd_card_free(snd_saa7134_cards[dev->nr]);
        snd_saa7134_cards[dev->nr] = NULL;
@@ -1260,7 +1262,8 @@ static void saa7134_alsa_exit(void)
        int idx;
 
        for (idx = 0; idx < SNDRV_CARDS; idx++) {
-               snd_card_free(snd_saa7134_cards[idx]);
+               if (snd_saa7134_cards[idx])
+                       snd_card_free(snd_saa7134_cards[idx]);
        }
 
        saa7134_dmasound_init = NULL;
index 526359447ff90fda699b032e0dbed4d6a06e577e..8b89ebe16d94ace645d7401ece918b83cf92add6 100644 (file)
@@ -215,6 +215,7 @@ config VIDEO_SAMSUNG_EXYNOS_GSC
 config VIDEO_STI_BDISP
        tristate "STMicroelectronics BDISP 2D blitter driver"
        depends on VIDEO_DEV && VIDEO_V4L2
+       depends on HAS_DMA
        depends on ARCH_STI || COMPILE_TEST
        select VIDEOBUF2_DMA_CONTIG
        select V4L2_MEM2MEM_DEV
index 40423c6c5324a8e1cf2573b2acffeb566ad74655..57d42c6172c576757b7724906852c7ec9ac34911 100644 (file)
@@ -1,6 +1,6 @@
 
 config VIDEO_SAMSUNG_EXYNOS4_IS
-       bool "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
+       tristate "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
        depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
        depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
        depends on OF && COMMON_CLK
index 49658ca39e5100d70ed9011eea6adb4cd02c3c55..979c388ebf60cdd1a497aac2570aab05c5e800a6 100644 (file)
@@ -631,6 +631,12 @@ static int fimc_is_hw_open_sensor(struct fimc_is *is,
 
        fimc_is_mem_barrier();
 
+       /*
+        * Some user space use cases hang up here without this
+        * empirically chosen delay.
+        */
+       udelay(100);
+
        mcuctl_write(HIC_OPEN_SENSOR, is, MCUCTL_REG_ISSR(0));
        mcuctl_write(is->sensor_index, is, MCUCTL_REG_ISSR(1));
        mcuctl_write(sensor->drvdata->id, is, MCUCTL_REG_ISSR(2));
index bf9261eb57a15c37a8b82fb3197a94649e4cc9c6..c0816728cbfe1d2be7c6eb5c987a4ac3b8213a74 100644 (file)
@@ -218,8 +218,8 @@ static void isp_video_capture_buffer_queue(struct vb2_buffer *vb)
                                                        ivb->dma_addr[i];
 
                        isp_dbg(2, &video->ve.vdev,
-                               "dma_buf %pad (%d/%d/%d) addr: %pad\n",
-                               &buf_index, ivb->index, i, vb->index,
+                               "dma_buf %d (%d/%d/%d) addr: %pad\n",
+                               buf_index, ivb->index, i, vb->index,
                                &ivb->dma_addr[i]);
                }
 
index f3b2dd30ec7769b3199da9ed2694df5886004616..e79ddbb1e14fc0bd8c1b56504183a927edcdec83 100644 (file)
@@ -185,6 +185,37 @@ error:
        return ret;
 }
 
+/**
+ * __fimc_pipeline_enable - enable power of all pipeline subdevs
+ *                         and the sensor clock
+ * @ep: video pipeline structure
+ * @fmd: fimc media device
+ *
+ * Called with the graph mutex held.
+ */
+static int __fimc_pipeline_enable(struct exynos_media_pipeline *ep,
+                                 struct fimc_md *fmd)
+{
+       struct fimc_pipeline *p = to_fimc_pipeline(ep);
+       int ret;
+
+       /* Enable PXLASYNC clock if this pipeline includes FIMC-IS */
+       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
+               ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
+               if (ret < 0)
+                       return ret;
+       }
+
+       ret = fimc_pipeline_s_power(p, 1);
+       if (!ret)
+               return 0;
+
+       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
+               clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
+
+       return ret;
+}
+
 /**
  * __fimc_pipeline_open - update the pipeline information, enable power
  *                        of all pipeline subdevs and the sensor clock
@@ -199,7 +230,6 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
        struct fimc_md *fmd = entity_to_fimc_mdev(me);
        struct fimc_pipeline *p = to_fimc_pipeline(ep);
        struct v4l2_subdev *sd;
-       int ret;
 
        if (WARN_ON(p == NULL || me == NULL))
                return -EINVAL;
@@ -208,24 +238,16 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
                fimc_pipeline_prepare(p, me);
 
        sd = p->subdevs[IDX_SENSOR];
-       if (sd == NULL)
-               return -EINVAL;
-
-       /* Disable PXLASYNC clock if this pipeline includes FIMC-IS */
-       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP]) {
-               ret = clk_prepare_enable(fmd->wbclk[CLK_IDX_WB_B]);
-               if (ret < 0)
-                       return ret;
-       }
-
-       ret = fimc_pipeline_s_power(p, 1);
-       if (!ret)
+       if (sd == NULL) {
+               pr_warn("%s(): No sensor subdev\n", __func__);
+               /*
+                * Pipeline open cannot fail so as to make it possible
+                * for the user space to configure the pipeline.
+                */
                return 0;
+       }
 
-       if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
-               clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
-
-       return ret;
+       return __fimc_pipeline_enable(ep, fmd);
 }
 
 /**
@@ -269,10 +291,43 @@ static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
                { IDX_CSIS, IDX_FLITE, IDX_FIMC, IDX_SENSOR, IDX_IS_ISP },
        };
        struct fimc_pipeline *p = to_fimc_pipeline(ep);
+       struct fimc_md *fmd = entity_to_fimc_mdev(&p->subdevs[IDX_CSIS]->entity);
+       enum fimc_subdev_index sd_id;
        int i, ret = 0;
 
-       if (p->subdevs[IDX_SENSOR] == NULL)
-               return -ENODEV;
+       if (p->subdevs[IDX_SENSOR] == NULL) {
+               if (!fmd->user_subdev_api) {
+                       /*
+                        * Sensor must be already discovered if we
+                        * aren't in the user_subdev_api mode
+                        */
+                       return -ENODEV;
+               }
+
+               /* Get pipeline sink entity */
+               if (p->subdevs[IDX_FIMC])
+                       sd_id = IDX_FIMC;
+               else if (p->subdevs[IDX_IS_ISP])
+                       sd_id = IDX_IS_ISP;
+               else if (p->subdevs[IDX_FLITE])
+                       sd_id = IDX_FLITE;
+               else
+                       return -ENODEV;
+
+               /*
+                * Sensor could have been linked between open and STREAMON -
+                * check if this is the case.
+                */
+               fimc_pipeline_prepare(p, &p->subdevs[sd_id]->entity);
+
+               if (p->subdevs[IDX_SENSOR] == NULL)
+                       return -ENODEV;
+
+               ret = __fimc_pipeline_enable(ep, fmd);
+               if (ret < 0)
+                       return ret;
+
+       }
 
        for (i = 0; i < IDX_MAX; i++) {
                unsigned int idx = seq[on][i];
@@ -282,8 +337,10 @@ static int __fimc_pipeline_s_stream(struct exynos_media_pipeline *ep, bool on)
                if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV)
                        goto error;
        }
+
        return 0;
 error:
+       fimc_pipeline_s_power(p, !on);
        for (; i >= 0; i--) {
                unsigned int idx = seq[on][i];
                v4l2_subdev_call(p->subdevs[idx], video, s_stream, !on);
index c398b285180cda757ec291bb6cc0c004c3c27de4..1af779ee3c747f31aff5d79d7d3db66d65a41b03 100644 (file)
@@ -795,7 +795,7 @@ static int isi_camera_get_formats(struct soc_camera_device *icd,
                        xlate->host_fmt = &isi_camera_formats[i];
                        xlate->code     = code.code;
                        dev_dbg(icd->parent, "Providing format %s using code %d\n",
-                               isi_camera_formats[0].name, code.code);
+                               xlate->host_fmt->name, xlate->code);
                }
                break;
        default:
index cc84c6d6a701ce249722010ed514c1e4a4630def..46c7186f78679a5ca09a01d8709cbb77cf1332bc 100644 (file)
@@ -1493,6 +1493,8 @@ static void soc_camera_async_unbind(struct v4l2_async_notifier *notifier,
                                        struct soc_camera_async_client, notifier);
        struct soc_camera_device *icd = platform_get_drvdata(sasc->pdev);
 
+       icd->control = NULL;
+
        if (icd->clk) {
                v4l2_clk_unregister(icd->clk);
                icd->clk = NULL;
index 42dff9d020afaf66c70e528b58d2f005a53203f3..533bc796391ed3053a5f21c92e94eba92cdc46a4 100644 (file)
@@ -256,7 +256,7 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
 
        /* Create links. */
        list_for_each_entry(entity, &vsp1->entities, list_dev) {
-               if (entity->type == VSP1_ENTITY_LIF) {
+               if (entity->type == VSP1_ENTITY_WPF) {
                        ret = vsp1_wpf_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
@@ -264,7 +264,10 @@ static int vsp1_create_entities(struct vsp1_device *vsp1)
                        ret = vsp1_rpf_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
-               } else {
+               }
+
+               if (entity->type != VSP1_ENTITY_LIF &&
+                   entity->type != VSP1_ENTITY_RPF) {
                        ret = vsp1_create_links(vsp1, entity);
                        if (ret < 0)
                                goto done;
index 637d0d6f79fba5fb511b6fba37fb24875d913dbd..b4dca57d1ae3f6d78a3417251089939a7da15f77 100644 (file)
@@ -515,7 +515,7 @@ static bool vsp1_pipeline_stopped(struct vsp1_pipeline *pipe)
        bool stopped;
 
        spin_lock_irqsave(&pipe->irqlock, flags);
-       stopped = pipe->state == VSP1_PIPELINE_STOPPED,
+       stopped = pipe->state == VSP1_PIPELINE_STOPPED;
        spin_unlock_irqrestore(&pipe->irqlock, flags);
 
        return stopped;
index c5d49d7a0d76d09c1ac5598a21ce7726d03190c1..ff8953ae52d142e654f30548d5ddf7bf08c45690 100644 (file)
@@ -1063,8 +1063,11 @@ EXPORT_SYMBOL_GPL(vb2_discard_done);
  */
 static int __qbuf_mmap(struct vb2_buffer *vb, const void *pb)
 {
-       int ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
-                       vb, pb, vb->planes);
+       int ret = 0;
+
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, vb->planes);
        return ret ? ret : call_vb_qop(vb, buf_prepare, vb);
 }
 
@@ -1077,14 +1080,16 @@ static int __qbuf_userptr(struct vb2_buffer *vb, const void *pb)
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
-       int ret;
+       int ret = 0;
        enum dma_data_direction dma_dir =
                q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
        bool reacquired = vb->planes[0].mem_priv == NULL;
 
        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
-       ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, planes);
        if (ret)
                return ret;
 
@@ -1192,14 +1197,16 @@ static int __qbuf_dmabuf(struct vb2_buffer *vb, const void *pb)
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
-       int ret;
+       int ret = 0;
        enum dma_data_direction dma_dir =
                q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
        bool reacquired = vb->planes[0].mem_priv == NULL;
 
        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
-       ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes);
+       if (pb)
+               ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
+                                vb, pb, planes);
        if (ret)
                return ret;
 
@@ -1520,7 +1527,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
        q->waiting_for_buffers = false;
        vb->state = VB2_BUF_STATE_QUEUED;
 
-       call_void_bufop(q, copy_timestamp, vb, pb);
+       if (pb)
+               call_void_bufop(q, copy_timestamp, vb, pb);
 
        trace_vb2_qbuf(q, vb);
 
@@ -1532,7 +1540,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb)
                __enqueue_in_driver(vb);
 
        /* Fill buffer information for the userspace */
-       call_void_bufop(q, fill_user_buffer, vb, pb);
+       if (pb)
+               call_void_bufop(q, fill_user_buffer, vb, pb);
 
        /*
         * If streamon has been called, and we haven't yet called
@@ -1731,7 +1740,8 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
  * The return values from this function are intended to be directly returned
  * from vidioc_dqbuf handler in driver.
  */
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+                  bool nonblocking)
 {
        struct vb2_buffer *vb = NULL;
        int ret;
@@ -1754,8 +1764,12 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
 
        call_void_vb_qop(vb, buf_finish, vb);
 
+       if (pindex)
+               *pindex = vb->index;
+
        /* Fill buffer information for the userspace */
-       call_void_bufop(q, fill_user_buffer, vb, pb);
+       if (pb)
+               call_void_bufop(q, fill_user_buffer, vb, pb);
 
        /* Remove from videobuf queue */
        list_del(&vb->queued_entry);
@@ -1828,7 +1842,7 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
         * that's done in dqbuf, but that's not going to happen when we
         * cancel the whole queue. Note: this code belongs here, not in
         * __vb2_dqbuf() since in vb2_internal_dqbuf() there is a critical
-        * call to __fill_v4l2_buffer() after buf_finish(). That order can't
+        * call to __fill_user_buffer() after buf_finish(). That order can't
         * be changed, so we can't move the buf_finish() to __vb2_dqbuf().
         */
        for (i = 0; i < q->num_buffers; ++i) {
@@ -2357,7 +2371,6 @@ struct vb2_fileio_data {
        unsigned int count;
        unsigned int type;
        unsigned int memory;
-       struct vb2_buffer *b;
        struct vb2_fileio_buf bufs[VB2_MAX_FRAME];
        unsigned int cur_index;
        unsigned int initial_index;
@@ -2410,12 +2423,6 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
        if (fileio == NULL)
                return -ENOMEM;
 
-       fileio->b = kzalloc(q->buf_struct_size, GFP_KERNEL);
-       if (fileio->b == NULL) {
-               kfree(fileio);
-               return -ENOMEM;
-       }
-
        fileio->read_once = q->fileio_read_once;
        fileio->write_immediately = q->fileio_write_immediately;
 
@@ -2460,13 +2467,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
                 * Queue all buffers.
                 */
                for (i = 0; i < q->num_buffers; i++) {
-                       struct vb2_buffer *b = fileio->b;
-
-                       memset(b, 0, q->buf_struct_size);
-                       b->type = q->type;
-                       b->memory = q->memory;
-                       b->index = i;
-                       ret = vb2_core_qbuf(q, i, b);
+                       ret = vb2_core_qbuf(q, i, NULL);
                        if (ret)
                                goto err_reqbufs;
                        fileio->bufs[i].queued = 1;
@@ -2511,7 +2512,6 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q)
                q->fileio = NULL;
                fileio->count = 0;
                vb2_core_reqbufs(q, fileio->memory, &fileio->count);
-               kfree(fileio->b);
                kfree(fileio);
                dprintk(3, "file io emulator closed\n");
        }
@@ -2539,7 +2539,8 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         * else is able to provide this information with the write() operation.
         */
        bool copy_timestamp = !read && q->copy_timestamp;
-       int ret, index;
+       unsigned index;
+       int ret;
 
        dprintk(3, "mode %s, offset %ld, count %zd, %sblocking\n",
                read ? "read" : "write", (long)*ppos, count,
@@ -2564,22 +2565,20 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         */
        index = fileio->cur_index;
        if (index >= q->num_buffers) {
-               struct vb2_buffer *b = fileio->b;
+               struct vb2_buffer *b;
 
                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
-               ret = vb2_core_dqbuf(q, b, nonblock);
+               ret = vb2_core_dqbuf(q, &index, NULL, nonblock);
                dprintk(5, "vb2_dqbuf result: %d\n", ret);
                if (ret)
                        return ret;
                fileio->dq_count += 1;
 
-               fileio->cur_index = index = b->index;
+               fileio->cur_index = index;
                buf = &fileio->bufs[index];
+               b = q->bufs[index];
 
                /*
                 * Get number of bytes filled by the driver
@@ -2630,7 +2629,7 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
         * Queue next buffer if required.
         */
        if (buf->pos == buf->size || (!read && fileio->write_immediately)) {
-               struct vb2_buffer *b = fileio->b;
+               struct vb2_buffer *b = q->bufs[index];
 
                /*
                 * Check if this is the last buffer to read.
@@ -2643,15 +2642,11 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_
                /*
                 * Call vb2_qbuf and give buffer to the driver.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
-               b->index = index;
                b->planes[0].bytesused = buf->pos;
 
                if (copy_timestamp)
                        b->timestamp = ktime_get_ns();
-               ret = vb2_core_qbuf(q, index, b);
+               ret = vb2_core_qbuf(q, index, NULL);
                dprintk(5, "vb2_dbuf result: %d\n", ret);
                if (ret)
                        return ret;
@@ -2713,10 +2708,9 @@ static int vb2_thread(void *data)
 {
        struct vb2_queue *q = data;
        struct vb2_threadio_data *threadio = q->threadio;
-       struct vb2_fileio_data *fileio = q->fileio;
        bool copy_timestamp = false;
-       int prequeue = 0;
-       int index = 0;
+       unsigned prequeue = 0;
+       unsigned index = 0;
        int ret = 0;
 
        if (q->is_output) {
@@ -2728,37 +2722,34 @@ static int vb2_thread(void *data)
 
        for (;;) {
                struct vb2_buffer *vb;
-               struct vb2_buffer *b = fileio->b;
 
                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
-               memset(b, 0, q->buf_struct_size);
-               b->type = q->type;
-               b->memory = q->memory;
                if (prequeue) {
-                       b->index = index++;
+                       vb = q->bufs[index++];
                        prequeue--;
                } else {
                        call_void_qop(q, wait_finish, q);
                        if (!threadio->stop)
-                               ret = vb2_core_dqbuf(q, b, 0);
+                               ret = vb2_core_dqbuf(q, &index, NULL, 0);
                        call_void_qop(q, wait_prepare, q);
                        dprintk(5, "file io: vb2_dqbuf result: %d\n", ret);
+                       if (!ret)
+                               vb = q->bufs[index];
                }
                if (ret || threadio->stop)
                        break;
                try_to_freeze();
 
-               vb = q->bufs[b->index];
-               if (b->state == VB2_BUF_STATE_DONE)
+               if (vb->state != VB2_BUF_STATE_ERROR)
                        if (threadio->fnc(vb, threadio->priv))
                                break;
                call_void_qop(q, wait_finish, q);
                if (copy_timestamp)
-                       b->timestamp = ktime_get_ns();;
+                       vb->timestamp = ktime_get_ns();;
                if (!threadio->stop)
-                       ret = vb2_core_qbuf(q, b->index, b);
+                       ret = vb2_core_qbuf(q, vb->index, NULL);
                call_void_qop(q, wait_prepare, q);
                if (ret || threadio->stop)
                        break;
index c9a28605511a71166af35a4ef11fb95ff0eb8fbd..91f552124050d2b3b91d2190af16fbdf2f9d7243 100644 (file)
@@ -625,7 +625,7 @@ static int vb2_internal_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b,
                return -EINVAL;
        }
 
-       ret = vb2_core_dqbuf(q, b, nonblocking);
+       ret = vb2_core_dqbuf(q, NULL, b, nonblocking);
 
        return ret;
 }
index 154aced0b91b63e33cb95a9ba1a82e386395392d..65cc0ac9b82d2b9e1eb928131c7a1dea074f2d02 100644 (file)
@@ -170,7 +170,7 @@ static int mmc_ios_show(struct seq_file *s, void *data)
                str = "invalid";
                break;
        }
-       seq_printf(s, "signal voltage:\t%u (%s)\n", ios->chip_select, str);
+       seq_printf(s, "signal voltage:\t%u (%s)\n", ios->signal_voltage, str);
 
        switch (ios->drv_type) {
        case MMC_SET_DRIVER_TYPE_A:
index 2b16263458af001723636f7770bbf4906b16b07c..aba786daebcadd9e193ffabe71e3a0124dd8ec62 100644 (file)
@@ -29,15 +29,18 @@ struct mmc_pwrseq_simple {
 static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
                                              int value)
 {
-       int i;
        struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
-       int values[reset_gpios->ndescs];
 
-       for (i = 0; i < reset_gpios->ndescs; i++)
-               values[i] = value;
+       if (!IS_ERR(reset_gpios)) {
+               int i;
+               int values[reset_gpios->ndescs];
 
-       gpiod_set_array_value_cansleep(reset_gpios->ndescs, reset_gpios->desc,
-                                      values);
+               for (i = 0; i < reset_gpios->ndescs; i++)
+                       values[i] = value;
+
+               gpiod_set_array_value_cansleep(
+                       reset_gpios->ndescs, reset_gpios->desc, values);
+       }
 }
 
 static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
@@ -79,7 +82,8 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host)
        struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
                                        struct mmc_pwrseq_simple, pwrseq);
 
-       gpiod_put_array(pwrseq->reset_gpios);
+       if (!IS_ERR(pwrseq->reset_gpios))
+               gpiod_put_array(pwrseq->reset_gpios);
 
        if (!IS_ERR(pwrseq->ext_clk))
                clk_put(pwrseq->ext_clk);
@@ -112,7 +116,9 @@ struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
        }
 
        pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(pwrseq->reset_gpios)) {
+       if (IS_ERR(pwrseq->reset_gpios) &&
+           PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
+           PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
                ret = PTR_ERR(pwrseq->reset_gpios);
                goto clk_put;
        }
index f2b164b214ae289dd313a0945cbbd5dd7e1dc532..bb39a29b2db68d72c5e34d7e408cce0105c110a7 100644 (file)
@@ -329,6 +329,7 @@ static int mmc_read_switch(struct mmc_card *card)
                card->sw_caps.sd3_bus_mode = status[13];
                /* Driver Strengths supported by the card */
                card->sw_caps.sd3_drv_type = status[9];
+               card->sw_caps.sd3_curr_limit = status[7] | status[6] << 8;
        }
 
 out:
@@ -545,14 +546,25 @@ static int sd_set_current_limit(struct mmc_card *card, u8 *status)
         * when we set current limit to 200ma, the card will draw 200ma, and
         * when we set current limit to 400/600/800ma, the card will draw its
         * maximum 300ma from the host.
+        *
+        * The above is incorrect: if we try to set a current limit that is
+        * not supported by the card, the card can rightfully error out the
+        * attempt, and remain at the default current limit.  This results
+        * in a 300mA card being limited to 200mA even though the host
+        * supports 800mA. Failures seen with SanDisk 8GB UHS cards with
+        * an iMX6 host. --rmk
         */
-       if (max_current >= 800)
+       if (max_current >= 800 &&
+           card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
                current_limit = SD_SET_CURRENT_LIMIT_800;
-       else if (max_current >= 600)
+       else if (max_current >= 600 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
                current_limit = SD_SET_CURRENT_LIMIT_600;
-       else if (max_current >= 400)
+       else if (max_current >= 400 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
                current_limit = SD_SET_CURRENT_LIMIT_400;
-       else if (max_current >= 200)
+       else if (max_current >= 200 &&
+                card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
                current_limit = SD_SET_CURRENT_LIMIT_200;
 
        if (current_limit != SD_SET_CURRENT_NO_CHANGE) {
@@ -626,9 +638,9 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
         * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
         */
        if (!mmc_host_is_spi(card->host) &&
-               (card->sd_bus_speed == UHS_SDR50_BUS_SPEED ||
-                card->sd_bus_speed == UHS_DDR50_BUS_SPEED ||
-                card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) {
+               (card->host->ios.timing == MMC_TIMING_UHS_SDR50 ||
+                card->host->ios.timing == MMC_TIMING_UHS_DDR50 ||
+                card->host->ios.timing == MMC_TIMING_UHS_SDR104)) {
                err = mmc_execute_tuning(card);
 
                /*
@@ -638,7 +650,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
                 * difference between v3.00 and 3.01 spec means that CMD19
                 * tuning is also available for DDR50 mode.
                 */
-               if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) {
+               if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) {
                        pr_warn("%s: ddr50 tuning failed\n",
                                mmc_hostname(card->host));
                        err = 0;
index d61ba1a0495ebe828ba8c110da3d9053b506079b..467b3cf80c44549c704080d78540c46599eb6882 100644 (file)
@@ -535,8 +535,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
         * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
         */
        if (!mmc_host_is_spi(card->host) &&
-           ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) ||
-            (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)))
+           ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) ||
+             (card->host->ios.timing == MMC_TIMING_UHS_SDR104)))
                err = mmc_execute_tuning(card);
 out:
        return err;
index 8e94e555b788d4bc56364c488f1af0894e4db50a..6f6fc527a263384817316afe478803634429633a 100644 (file)
@@ -223,6 +223,7 @@ static const struct cis_tpl cis_tpl_list[] = {
        {       0x20,   4,      cistpl_manfid           },
        {       0x21,   2,      /* cistpl_funcid */     },
        {       0x22,   0,      cistpl_funce            },
+       {       0x91,   2,      /* cistpl_sdio_std */   },
 };
 
 static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func)
index fb266745f8240d603956b8b791370ac4764f5a23..0d6ca4116f3dca7fe3170e53809e5e300193cca0 100644 (file)
@@ -151,6 +151,7 @@ static struct variant_data variant_nomadik = {
        .fifosize               = 16 * 4,
        .fifohalfsize           = 8 * 4,
        .clkreg                 = MCI_CLK_ENABLE,
+       .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
        .datalength_bits        = 24,
        .datactrl_mask_sdio     = MCI_ST_DPSM_SDIOEN,
        .st_sdio                = true,
@@ -1886,7 +1887,7 @@ static struct amba_id mmci_ids[] = {
        {
                .id     = 0x00280180,
                .mask   = 0x00ffffff,
-               .data   = &variant_u300,
+               .data   = &variant_nomadik,
        },
        {
                .id     = 0x00480180,
index e4b05dbb9ca822f003f566d07a60c508721bc68c..4a0d6b80eaa374e7749081c7f966ce85379ad3e2 100644 (file)
@@ -94,9 +94,9 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
                        desc = NULL;
                        ret = cookie;
                }
+               dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+                       __func__, host->sg_len, ret, cookie, host->mrq);
        }
-       dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-               __func__, host->sg_len, ret, cookie, host->mrq);
 
 pio:
        if (!desc) {
@@ -116,8 +116,8 @@ pio:
                         "DMA failed: %d, falling back to PIO\n", ret);
        }
 
-       dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
-               desc, cookie, host->sg_len);
+       dev_dbg(&host->pdev->dev, "%s(): desc %p, sg[%d]\n", __func__,
+               desc, host->sg_len);
 }
 
 static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
@@ -174,9 +174,9 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
                        desc = NULL;
                        ret = cookie;
                }
+               dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+                       __func__, host->sg_len, ret, cookie, host->mrq);
        }
-       dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-               __func__, host->sg_len, ret, cookie, host->mrq);
 
 pio:
        if (!desc) {
@@ -196,8 +196,7 @@ pio:
                         "DMA failed: %d, falling back to PIO\n", ret);
        }
 
-       dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
-               desc, cookie);
+       dev_dbg(&host->pdev->dev, "%s(): desc %p\n", __func__, desc);
 }
 
 void tmio_mmc_start_dma(struct tmio_mmc_host *host,
index 44093699859321ce4164ed63d368ec8b72c5d49b..cec3188a170d6a504aad659c45cdd9613002c394 100644 (file)
@@ -24,6 +24,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm963xx_tag.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/mach-bcm63xx/bcm63xx_nvram.h>
-#include <asm/mach-bcm63xx/bcm963xx_tag.h>
 #include <asm/mach-bcm63xx/board_bcm963xx.h>
 
-#define BCM63XX_EXTENDED_SIZE  0xBFC00000      /* Extended flash address */
-
 #define BCM63XX_CFE_BLOCK_SIZE SZ_64K          /* always at least 64KiB */
 
 #define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
@@ -123,8 +121,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
                pr_info("CFE boot tag found with version %s and board type %s\n",
                        tagversion, boardid);
 
-               kerneladdr = kerneladdr - BCM63XX_EXTENDED_SIZE;
-               rootfsaddr = rootfsaddr - BCM63XX_EXTENDED_SIZE;
+               kerneladdr = kerneladdr - BCM963XX_EXTENDED_SIZE;
+               rootfsaddr = rootfsaddr - BCM963XX_EXTENDED_SIZE;
                spareaddr = roundup(totallen, master->erasesize) + cfelen;
 
                if (rootfsaddr < kerneladdr) {
index 54e056d3be0250910f464732bacc13bfd7eee50b..ee2b74d1d1b5e388a7d60880e5ef0e1f07676e70 100644 (file)
@@ -174,9 +174,9 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end,
        struct ubi_device *ubi = desc->vol->ubi;
        struct inode *inode = file_inode(file);
        int err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = ubi_sync(ubi->ubi_num);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 9fe33fc3c2b9a54ddbd0db4e8a03d750553b6d7c..cf34681af4f625d07bc43c77d528d632325bdc0d 100644 (file)
@@ -1532,7 +1532,7 @@ int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
 
        /* no PVID with ranges, otherwise it's a bug */
        if (pvid)
-               err = _mv88e6xxx_port_pvid_set(ds, port, vid);
+               err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end);
 unlock:
        mutex_unlock(&ps->smi_mutex);
 
@@ -2163,7 +2163,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
         * database, and allow every port to egress frames on all other ports.
         */
        reg = BIT(ps->num_ports) - 1; /* all ports */
-       ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg & ~port);
+       reg &= ~BIT(port); /* except itself */
+       ret = _mv88e6xxx_port_vlan_map_set(ds, port, reg);
        if (ret)
                goto abort;
 
index a4799c1fc7d4de55c38a0e1a99c1aa42985d5143..5eb9b20c0eeab09954a909290ac5bd9bf3fdcb06 100644 (file)
@@ -628,6 +628,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
        int ret;
 
        ring = pdata->rx_ring;
+       irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
        ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
                               IRQF_SHARED, ring->irq_name, ring);
        if (ret)
@@ -635,6 +636,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 
        if (pdata->cq_cnt) {
                ring = pdata->tx_ring->cp_ring;
+               irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
                ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
                                       IRQF_SHARED, ring->irq_name, ring);
                if (ret) {
@@ -649,15 +651,19 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 static void xgene_enet_free_irq(struct net_device *ndev)
 {
        struct xgene_enet_pdata *pdata;
+       struct xgene_enet_desc_ring *ring;
        struct device *dev;
 
        pdata = netdev_priv(ndev);
        dev = ndev_to_dev(ndev);
-       devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+       ring = pdata->rx_ring;
+       irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+       devm_free_irq(dev, ring->irq, ring);
 
        if (pdata->cq_cnt) {
-               devm_free_irq(dev, pdata->tx_ring->cp_ring->irq,
-                             pdata->tx_ring->cp_ring);
+               ring = pdata->tx_ring->cp_ring;
+               irq_clear_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
+               devm_free_irq(dev, ring->irq, ring);
        }
 }
 
index 70d5b62c125a489ceb5fafaf21b96988cecaab5c..248dfc40a7611a0b298336c0aa552665da165ba7 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/efi.h>
+#include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
index ecc4a334c50727c7a93b07576b540fc4b5b53d58..f71ab2647a3bc1f62f6b7450b34dd325c710549d 100644 (file)
@@ -302,7 +302,7 @@ static int nb8800_poll(struct napi_struct *napi, int budget)
        nb8800_tx_done(dev);
 
 again:
-       while (work < budget) {
+       do {
                struct nb8800_rx_buf *rxb;
                unsigned int len;
 
@@ -330,7 +330,7 @@ again:
                rxd->report = 0;
                last = next;
                work++;
-       }
+       } while (work < budget);
 
        if (work) {
                priv->rx_descs[last].desc.config |= DESC_EOC;
index 8550df189ceb709290c8b1caad4a3132dcb6a810..19f7cd02e08527ec234b1e8de426bad1fd37cadb 100644 (file)
@@ -151,8 +151,11 @@ config BNX2X_VXLAN
 
 config BGMAC
        tristate "BCMA bus GBit core support"
-       depends on BCMA_HOST_SOC && HAS_DMA && (BCM47XX || ARCH_BCM_5301X)
+       depends on BCMA && BCMA_HOST_SOC
+       depends on HAS_DMA
+       depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
        select PHYLIB
+       select FIXED_PHY
        ---help---
          This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus.
          They can be found on BCM47xx SoCs and provide gigabit ethernet.
index df835f5e46d85f374cb1e0d9adf1fd8e16cb40f3..5dc89e527e7deefe04c831d9ec556ed76ac40d26 100644 (file)
@@ -1490,10 +1490,11 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
 
                        last = tx_buf->nr_frags;
                        j += 2;
-                       for (k = 0; k < last; k++, j = NEXT_TX(j)) {
+                       for (k = 0; k < last; k++, j++) {
+                               int ring_idx = j & bp->tx_ring_mask;
                                skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
 
-                               tx_buf = &txr->tx_buf_ring[j];
+                               tx_buf = &txr->tx_buf_ring[ring_idx];
                                dma_unmap_page(
                                        &pdev->dev,
                                        dma_unmap_addr(tx_buf, mapping),
@@ -3406,7 +3407,7 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
        struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
        u16 error_code;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, -1, -1);
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
        req.ring_type = ring_type;
        req.ring_id = cpu_to_le16(ring->fw_ring_id);
 
@@ -4819,8 +4820,6 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
                stats->multicast += le64_to_cpu(hw_stats->rx_mcast_pkts);
 
-               stats->rx_dropped += le64_to_cpu(hw_stats->rx_drop_pkts);
-
                stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
        }
 
index 0d775964b06016bfcc66352c99c0164c6957d773..457c3bc8cfff49654b45e879495e5cbfa88428d6 100644 (file)
@@ -401,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
         * Ethernet MAC ISRs
         */
        if (priv->internal_phy)
-               priv->mii_bus->irq[phydev->mdio.addr] = PHY_IGNORE_INTERRUPT;
+               priv->phydev->irq = PHY_IGNORE_INTERRUPT;
 
        return 0;
 }
index 9293675df7ba99c44b2f77c739fbe12da0e86093..49eea8981332d679b31614aa4d284e258abd97d6 100644 (file)
@@ -12016,7 +12016,7 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
        int ret;
        u32 offset, len, b_offset, odd_len;
        u8 *buf;
-       __be32 start, end;
+       __be32 start = 0, end;
 
        if (tg3_flag(tp, NO_NVRAM) ||
            eeprom->magic != TG3_EEPROM_MAGIC)
index 9d9984a87d4227f0b79c01e5ff1a3877da41b7e3..50c94104f19cc5d90b5fb3d4643bae1d4624e127 100644 (file)
@@ -2823,7 +2823,7 @@ static int macb_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct device_node *phy_node;
        const struct macb_config *macb_config = NULL;
-       struct clk *pclk, *hclk, *tx_clk;
+       struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
        unsigned int queue_mask, num_queues;
        struct macb_platform_data *pdata;
        bool native_io;
index b89504405b7207f12663aee0d6d3ecf78d392235..872765527081ab3d48d7701f8a9d8705553640fc 100644 (file)
@@ -1526,7 +1526,6 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
                                struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct lio *lio = container_of(ptp, struct lio, ptp_info);
        struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
@@ -1536,8 +1535,7 @@ static int liquidio_ptp_gettime(struct ptp_clock_info *ptp,
        ns += lio->ptp_adjust;
        spin_unlock_irqrestore(&lio->ptp_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
 
        return 0;
 }
index 48ecbc8aaaea2983fdce3b2c9bb08d0578059f41..b423ad380b6a3ff0b31621cf98152143030c2ed6 100644 (file)
@@ -18,6 +18,7 @@ if NET_VENDOR_EZCHIP
 config EZCHIP_NPS_MANAGEMENT_ENET
        tristate "EZchip NPS management enet support"
        depends on OF_IRQ && OF_NET
+       depends on HAS_IOMEM
        ---help---
          Simple LAN device for debug or management purposes.
          Device supports interrupts for RX and TX(completion).
index 4097c58d17a7527a3654fc6664e575d97575d4fe..cbe21dc7e37ee41dab11425d673ba0d20409074a 100644 (file)
@@ -4,6 +4,9 @@
 
 obj-$(CONFIG_FEC) += fec.o
 fec-objs :=fec_main.o fec_ptp.o
+CFLAGS_fec_main.o := -D__CHECK_ENDIAN__
+CFLAGS_fec_ptp.o := -D__CHECK_ENDIAN__
+
 obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx.o
 ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y)
        obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o
index 99d33e2d35e6c2c219fbd06f34546619a01a586d..2106d72c91dc610fdf7b368e6dbf0a99c98c0a3d 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/timecounter.h>
 
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
-    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 /*
  *     Just figures, Motorola would have to change the offsets for
  *     registers in the same peripheral device on different models
 
 /*
  *     Define the buffer descriptor structure.
+ *
+ *     Evidently, ARM SoCs have the FEC block generated in a
+ *     little endian mode so adjust endianness accordingly.
  */
-#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+#if defined(CONFIG_ARM)
+#define fec32_to_cpu le32_to_cpu
+#define fec16_to_cpu le16_to_cpu
+#define cpu_to_fec32 cpu_to_le32
+#define cpu_to_fec16 cpu_to_le16
+#define __fec32 __le32
+#define __fec16 __le16
+
 struct bufdesc {
-       unsigned short cbd_datlen;      /* Data length */
-       unsigned short cbd_sc;  /* Control and status info */
-       unsigned long cbd_bufaddr;      /* Buffer address */
+       __fec16 cbd_datlen;     /* Data length */
+       __fec16 cbd_sc;         /* Control and status info */
+       __fec32 cbd_bufaddr;    /* Buffer address */
 };
 #else
+#define fec32_to_cpu be32_to_cpu
+#define fec16_to_cpu be16_to_cpu
+#define cpu_to_fec32 cpu_to_be32
+#define cpu_to_fec16 cpu_to_be16
+#define __fec32 __be32
+#define __fec16 __be16
+
 struct bufdesc {
-       unsigned short  cbd_sc;                 /* Control and status info */
-       unsigned short  cbd_datlen;             /* Data length */
-       unsigned long   cbd_bufaddr;            /* Buffer address */
+       __fec16 cbd_sc;         /* Control and status info */
+       __fec16 cbd_datlen;     /* Data length */
+       __fec32 cbd_bufaddr;    /* Buffer address */
 };
 #endif
 
 struct bufdesc_ex {
        struct bufdesc desc;
-       unsigned long cbd_esc;
-       unsigned long cbd_prot;
-       unsigned long cbd_bdu;
-       unsigned long ts;
-       unsigned short res0[4];
+       __fec32 cbd_esc;
+       __fec32 cbd_prot;
+       __fec32 cbd_bdu;
+       __fec32 ts;
+       __fec16 res0[4];
 };
 
 /*
index 502da6f48f95e5e52a43ce5666a5c265fcd9cfe3..41c81f6ec630a289c2c280ae068f429a8da9e21e 100644 (file)
@@ -332,11 +332,13 @@ static void fec_dump(struct net_device *ndev)
        bdp = txq->tx_bd_base;
 
        do {
-               pr_info("%3u %c%c 0x%04x 0x%08lx %4u %p\n",
+               pr_info("%3u %c%c 0x%04x 0x%08x %4u %p\n",
                        index,
                        bdp == txq->cur_tx ? 'S' : ' ',
                        bdp == txq->dirty_tx ? 'H' : ' ',
-                       bdp->cbd_sc, bdp->cbd_bufaddr, bdp->cbd_datlen,
+                       fec16_to_cpu(bdp->cbd_sc),
+                       fec32_to_cpu(bdp->cbd_bufaddr),
+                       fec16_to_cpu(bdp->cbd_datlen),
                        txq->tx_skbuff[index]);
                bdp = fec_enet_get_nextdesc(bdp, fep, 0);
                index++;
@@ -389,7 +391,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
                ebdp = (struct bufdesc_ex *)bdp;
 
-               status = bdp->cbd_sc;
+               status = fec16_to_cpu(bdp->cbd_sc);
                status &= ~BD_ENET_TX_STATS;
                status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
                frag_len = skb_shinfo(skb)->frags[frag].size;
@@ -411,7 +413,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                        if (skb->ip_summed == CHECKSUM_PARTIAL)
                                estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                        ebdp->cbd_bdu = 0;
-                       ebdp->cbd_esc = estatus;
+                       ebdp->cbd_esc = cpu_to_fec32(estatus);
                }
 
                bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
@@ -435,9 +437,9 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                        goto dma_mapping_error;
                }
 
-               bdp->cbd_bufaddr = addr;
-               bdp->cbd_datlen = frag_len;
-               bdp->cbd_sc = status;
+               bdp->cbd_bufaddr = cpu_to_fec32(addr);
+               bdp->cbd_datlen = cpu_to_fec16(frag_len);
+               bdp->cbd_sc = cpu_to_fec16(status);
        }
 
        return bdp;
@@ -445,8 +447,8 @@ dma_mapping_error:
        bdp = txq->cur_tx;
        for (i = 0; i < frag; i++) {
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
-               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                               bdp->cbd_datlen, DMA_TO_DEVICE);
+               dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr),
+                                fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE);
        }
        return ERR_PTR(-ENOMEM);
 }
@@ -483,7 +485,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
        /* Fill in a Tx ring entry */
        bdp = txq->cur_tx;
        last_bdp = bdp;
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
        /* Set buffer length and buffer pointer */
@@ -539,21 +541,21 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
 
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
        index = fec_enet_get_bd_index(txq->tx_bd_base, last_bdp, fep);
        /* Save skb pointer */
        txq->tx_skbuff[index] = skb;
 
-       bdp->cbd_datlen = buflen;
-       bdp->cbd_bufaddr = addr;
+       bdp->cbd_datlen = cpu_to_fec16(buflen);
+       bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
        /* Send it on its way.  Tell FEC it's ready, interrupt when done,
         * it's the last BD of the frame, and to put the CRC on the end.
         */
        status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        /* If this was the last BD in the ring, start at the beginning again. */
        bdp = fec_enet_get_nextdesc(last_bdp, fep, queue);
@@ -585,7 +587,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
        unsigned int estatus = 0;
        dma_addr_t addr;
 
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
 
        status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
@@ -607,8 +609,8 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
                return NETDEV_TX_BUSY;
        }
 
-       bdp->cbd_datlen = size;
-       bdp->cbd_bufaddr = addr;
+       bdp->cbd_datlen = cpu_to_fec16(size);
+       bdp->cbd_bufaddr = cpu_to_fec32(addr);
 
        if (fep->bufdesc_ex) {
                if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -616,7 +618,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
        /* Handle the last BD specially */
@@ -625,10 +627,10 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
        if (is_last) {
                status |= BD_ENET_TX_INTR;
                if (fep->bufdesc_ex)
-                       ebdp->cbd_esc |= BD_ENET_TX_INT;
+                       ebdp->cbd_esc |= cpu_to_fec32(BD_ENET_TX_INT);
        }
 
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        return 0;
 }
@@ -647,7 +649,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
        unsigned short status;
        unsigned int estatus = 0;
 
-       status = bdp->cbd_sc;
+       status = fec16_to_cpu(bdp->cbd_sc);
        status &= ~BD_ENET_TX_STATS;
        status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
 
@@ -671,8 +673,8 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
                }
        }
 
-       bdp->cbd_bufaddr = dmabuf;
-       bdp->cbd_datlen = hdr_len;
+       bdp->cbd_bufaddr = cpu_to_fec32(dmabuf);
+       bdp->cbd_datlen = cpu_to_fec16(hdr_len);
 
        if (fep->bufdesc_ex) {
                if (fep->quirks & FEC_QUIRK_HAS_AVB)
@@ -680,10 +682,10 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
                ebdp->cbd_bdu = 0;
-               ebdp->cbd_esc = estatus;
+               ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
-       bdp->cbd_sc = status;
+       bdp->cbd_sc = cpu_to_fec16(status);
 
        return 0;
 }
@@ -823,15 +825,15 @@ static void fec_enet_bd_init(struct net_device *dev)
 
                        /* Initialize the BD for every fragment in the page. */
                        if (bdp->cbd_bufaddr)
-                               bdp->cbd_sc = BD_ENET_RX_EMPTY;
+                               bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
                        else
-                               bdp->cbd_sc = 0;
+                               bdp->cbd_sc = cpu_to_fec16(0);
                        bdp = fec_enet_get_nextdesc(bdp, fep, q);
                }
 
                /* Set the last buffer to wrap */
                bdp = fec_enet_get_prevdesc(bdp, fep, q);
-               bdp->cbd_sc |= BD_SC_WRAP;
+               bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
                rxq->cur_rx = rxq->rx_bd_base;
        }
@@ -844,18 +846,18 @@ static void fec_enet_bd_init(struct net_device *dev)
 
                for (i = 0; i < txq->tx_ring_size; i++) {
                        /* Initialize the BD for every fragment in the page. */
-                       bdp->cbd_sc = 0;
+                       bdp->cbd_sc = cpu_to_fec16(0);
                        if (txq->tx_skbuff[i]) {
                                dev_kfree_skb_any(txq->tx_skbuff[i]);
                                txq->tx_skbuff[i] = NULL;
                        }
-                       bdp->cbd_bufaddr = 0;
+                       bdp->cbd_bufaddr = cpu_to_fec32(0);
                        bdp = fec_enet_get_nextdesc(bdp, fep, q);
                }
 
                /* Set the last buffer to wrap */
                bdp = fec_enet_get_prevdesc(bdp, fep, q);
-               bdp->cbd_sc |= BD_SC_WRAP;
+               bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
                txq->dirty_tx = bdp;
        }
 }
@@ -947,8 +949,10 @@ fec_restart(struct net_device *ndev)
         */
        if (fep->quirks & FEC_QUIRK_ENET_MAC) {
                memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
-               writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
-               writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
+               writel((__force u32)cpu_to_be32(temp_mac[0]),
+                      fep->hwp + FEC_ADDR_LOW);
+               writel((__force u32)cpu_to_be32(temp_mac[1]),
+                      fep->hwp + FEC_ADDR_HIGH);
        }
 
        /* Clear any outstanding interrupt. */
@@ -1222,7 +1226,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
        while (bdp != READ_ONCE(txq->cur_tx)) {
                /* Order the load of cur_tx and cbd_sc */
                rmb();
-               status = READ_ONCE(bdp->cbd_sc);
+               status = fec16_to_cpu(READ_ONCE(bdp->cbd_sc));
                if (status & BD_ENET_TX_READY)
                        break;
 
@@ -1230,10 +1234,12 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
                skb = txq->tx_skbuff[index];
                txq->tx_skbuff[index] = NULL;
-               if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr))
-                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                                       bdp->cbd_datlen, DMA_TO_DEVICE);
-               bdp->cbd_bufaddr = 0;
+               if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                       dma_unmap_single(&fep->pdev->dev,
+                                        fec32_to_cpu(bdp->cbd_bufaddr),
+                                        fec16_to_cpu(bdp->cbd_datlen),
+                                        DMA_TO_DEVICE);
+               bdp->cbd_bufaddr = cpu_to_fec32(0);
                if (!skb) {
                        bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
                        continue;
@@ -1264,7 +1270,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
                        struct skb_shared_hwtstamps shhwtstamps;
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-                       fec_enet_hwtstamp(fep, ebdp->ts, &shhwtstamps);
+                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
                        skb_tstamp_tx(skb, &shhwtstamps);
                }
 
@@ -1324,10 +1330,8 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff
        if (off)
                skb_reserve(skb, fep->rx_align + 1 - off);
 
-       bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data,
-                                         FEC_ENET_RX_FRSIZE - fep->rx_align,
-                                         DMA_FROM_DEVICE);
-       if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+       bdp->cbd_bufaddr = cpu_to_fec32(dma_map_single(&fep->pdev->dev, skb->data, FEC_ENET_RX_FRSIZE - fep->rx_align, DMA_FROM_DEVICE));
+       if (dma_mapping_error(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr))) {
                if (net_ratelimit())
                        netdev_err(ndev, "Rx DMA memory map failed\n");
                return -ENOMEM;
@@ -1349,7 +1353,8 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
        if (!new_skb)
                return false;
 
-       dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
+       dma_sync_single_for_cpu(&fep->pdev->dev,
+                               fec32_to_cpu(bdp->cbd_bufaddr),
                                FEC_ENET_RX_FRSIZE - fep->rx_align,
                                DMA_FROM_DEVICE);
        if (!swap)
@@ -1396,7 +1401,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
         */
        bdp = rxq->cur_rx;
 
-       while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
+       while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
 
                if (pkt_received >= budget)
                        break;
@@ -1438,7 +1443,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                /* Process the incoming frame. */
                ndev->stats.rx_packets++;
-               pkt_len = bdp->cbd_datlen;
+               pkt_len = fec16_to_cpu(bdp->cbd_datlen);
                ndev->stats.rx_bytes += pkt_len;
 
                index = fec_enet_get_bd_index(rxq->rx_bd_base, bdp, fep);
@@ -1456,7 +1461,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                                ndev->stats.rx_dropped++;
                                goto rx_processing_done;
                        }
-                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                       dma_unmap_single(&fep->pdev->dev,
+                                        fec32_to_cpu(bdp->cbd_bufaddr),
                                         FEC_ENET_RX_FRSIZE - fep->rx_align,
                                         DMA_FROM_DEVICE);
                }
@@ -1475,7 +1481,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                /* If this is a VLAN packet remove the VLAN Tag */
                vlan_packet_rcvd = false;
                if ((ndev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-                       fep->bufdesc_ex && (ebdp->cbd_esc & BD_ENET_RX_VLAN)) {
+                   fep->bufdesc_ex &&
+                   (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) {
                        /* Push and remove the vlan tag */
                        struct vlan_hdr *vlan_header =
                                        (struct vlan_hdr *) (data + ETH_HLEN);
@@ -1491,12 +1498,12 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                /* Get receive timestamp from the skb */
                if (fep->hwts_rx_en && fep->bufdesc_ex)
-                       fec_enet_hwtstamp(fep, ebdp->ts,
+                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts),
                                          skb_hwtstamps(skb));
 
                if (fep->bufdesc_ex &&
                    (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
-                       if (!(ebdp->cbd_esc & FLAG_RX_CSUM_ERROR)) {
+                       if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) {
                                /* don't check it */
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
                        } else {
@@ -1513,7 +1520,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                napi_gro_receive(&fep->napi, skb);
 
                if (is_copybreak) {
-                       dma_sync_single_for_device(&fep->pdev->dev, bdp->cbd_bufaddr,
+                       dma_sync_single_for_device(&fep->pdev->dev,
+                                                  fec32_to_cpu(bdp->cbd_bufaddr),
                                                   FEC_ENET_RX_FRSIZE - fep->rx_align,
                                                   DMA_FROM_DEVICE);
                } else {
@@ -1527,12 +1535,12 @@ rx_processing_done:
 
                /* Mark the buffer empty */
                status |= BD_ENET_RX_EMPTY;
-               bdp->cbd_sc = status;
+               bdp->cbd_sc = cpu_to_fec16(status);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
-                       ebdp->cbd_esc = BD_ENET_RX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
                        ebdp->cbd_prot = 0;
                        ebdp->cbd_bdu = 0;
                }
@@ -2145,8 +2153,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev)
 
 /* List of registers that can be safety be read to dump them with ethtool */
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-       defined(CONFIG_M520x) || defined(CONFIG_M532x) ||               \
-       defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
+       defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
 static u32 fec_enet_register_offset[] = {
        FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
        FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -2662,7 +2669,7 @@ static void fec_enet_free_buffers(struct net_device *ndev)
                        rxq->rx_skbuff[i] = NULL;
                        if (skb) {
                                dma_unmap_single(&fep->pdev->dev,
-                                                bdp->cbd_bufaddr,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
                                                 FEC_ENET_RX_FRSIZE - fep->rx_align,
                                                 DMA_FROM_DEVICE);
                                dev_kfree_skb(skb);
@@ -2777,11 +2784,11 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
                }
 
                rxq->rx_skbuff[i] = skb;
-               bdp->cbd_sc = BD_ENET_RX_EMPTY;
+               bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-                       ebdp->cbd_esc = BD_ENET_RX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
                }
 
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2789,7 +2796,7 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
 
        /* Set the last buffer to wrap. */
        bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-       bdp->cbd_sc |= BD_SC_WRAP;
+       bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
        return 0;
 
  err_alloc:
@@ -2812,12 +2819,12 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
                if (!txq->tx_bounce[i])
                        goto err_alloc;
 
-               bdp->cbd_sc = 0;
-               bdp->cbd_bufaddr = 0;
+               bdp->cbd_sc = cpu_to_fec16(0);
+               bdp->cbd_bufaddr = cpu_to_fec32(0);
 
                if (fep->bufdesc_ex) {
                        struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-                       ebdp->cbd_esc = BD_ENET_TX_INT;
+                       ebdp->cbd_esc = cpu_to_fec32(BD_ENET_TX_INT);
                }
 
                bdp = fec_enet_get_nextdesc(bdp, fep, queue);
@@ -2825,7 +2832,7 @@ fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
 
        /* Set the last buffer to wrap. */
        bdp = fec_enet_get_prevdesc(bdp, fep, queue);
-       bdp->cbd_sc |= BD_SC_WRAP;
+       bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
 
        return 0;
 
index 52e0091b4fb261f983ab0e043d1cb0d48638dc03..1ba359f17ec6e2103d8b1b126ec84a55d30f006e 100644 (file)
@@ -552,7 +552,7 @@ static void tx_restart(struct net_device *dev)
        cbd_t __iomem *prev_bd;
        cbd_t __iomem *last_tx_bd;
 
-       last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t));
+       last_tx_bd = fep->tx_bd_base + (fpi->tx_ring - 1);
 
        /* get the current bd held in TBPTR  and scan back from this point */
        recheck_bd = curr_tbptr = (cbd_t __iomem *)
index b3645297477e53309918e6762c44a6bfde031880..3bfe36f9405b60b4822185853ca3d3d35ea563a5 100644 (file)
@@ -95,21 +95,17 @@ static struct hnae_buf_ops hnae_bops = {
 static int __ae_match(struct device *dev, const void *data)
 {
        struct hnae_ae_dev *hdev = cls_to_ae_dev(dev);
-       const char *ae_id = data;
 
-       if (!strncmp(ae_id, hdev->name, AE_NAME_SIZE))
-               return 1;
-
-       return 0;
+       return hdev->dev->of_node == data;
 }
 
-static struct hnae_ae_dev *find_ae(const char *ae_id)
+static struct hnae_ae_dev *find_ae(const struct device_node *ae_node)
 {
        struct device *dev;
 
-       WARN_ON(!ae_id);
+       WARN_ON(!ae_node);
 
-       dev = class_find_device(hnae_class, NULL, ae_id, __ae_match);
+       dev = class_find_device(hnae_class, NULL, ae_node, __ae_match);
 
        return dev ? cls_to_ae_dev(dev) : NULL;
 }
@@ -316,7 +312,8 @@ EXPORT_SYMBOL(hnae_reinit_handle);
  * return handle ptr or ERR_PTR
  */
 struct hnae_handle *hnae_get_handle(struct device *owner_dev,
-                                   const char *ae_id, u32 port_id,
+                                   const struct device_node *ae_node,
+                                   u32 port_id,
                                    struct hnae_buf_ops *bops)
 {
        struct hnae_ae_dev *dev;
@@ -324,7 +321,7 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev,
        int i, j;
        int ret;
 
-       dev = find_ae(ae_id);
+       dev = find_ae(ae_node);
        if (!dev)
                return ERR_PTR(-ENODEV);
 
index 6ca94dc3dda3c2447d59e369fee4eadeba6e0f52..1cbcb9fa3fb56b5ea4fee252f3f35294e6e7b6d3 100644 (file)
@@ -524,8 +524,11 @@ struct hnae_handle {
 
 #define ring_to_dev(ring) ((ring)->q->dev->dev)
 
-struct hnae_handle *hnae_get_handle(struct device *owner_dev, const char *ae_id,
-                                   u32 port_id, struct hnae_buf_ops *bops);
+struct hnae_handle *hnae_get_handle(struct device *owner_dev,
+                                   const struct device_node *ae_node,
+                                   u32 port_id,
+                                   struct hnae_buf_ops *bops);
+
 void hnae_put_handle(struct hnae_handle *handle);
 int hnae_ae_register(struct hnae_ae_dev *dev, struct module *owner);
 void hnae_ae_unregister(struct hnae_ae_dev *dev);
index 522b264866b42e68b65e40cd93e28f991a3e678c..a0070d0e740dae5b3de8f2b2a92a772854b08321 100644 (file)
@@ -847,6 +847,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
 {
        struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev;
+       static atomic_t id = ATOMIC_INIT(-1);
 
        switch (dsaf_dev->dsaf_ver) {
        case AE_VERSION_1:
@@ -858,6 +859,9 @@ int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
        default:
                break;
        }
+
+       snprintf(ae_dev->name, AE_NAME_SIZE, "%s%d", DSAF_DEVICE_NAME,
+                (int)atomic_inc_return(&id));
        ae_dev->ops = &hns_dsaf_ops;
        ae_dev->dev = dsaf_dev->dev;
 
index 1c33bd06bd5cc484ba320e60ceb5bee113493014..9439f04962e1d96bf480d8aedf6472c912e28813 100644 (file)
@@ -35,7 +35,7 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
        int ret, i;
        u32 desc_num;
        u32 buf_size;
-       const char *name, *mode_str;
+       const char *mode_str;
        struct device_node *np = dsaf_dev->dev->of_node;
 
        if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
@@ -43,14 +43,6 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
        else
                dsaf_dev->dsaf_ver = AE_VERSION_2;
 
-       ret = of_property_read_string(np, "dsa_name", &name);
-       if (ret) {
-               dev_err(dsaf_dev->dev, "get dsaf name fail, ret=%d!\n", ret);
-               return ret;
-       }
-       strncpy(dsaf_dev->ae_dev.name, name, AE_NAME_SIZE);
-       dsaf_dev->ae_dev.name[AE_NAME_SIZE - 1] = '\0';
-
        ret = of_property_read_string(np, "mode", &mode_str);
        if (ret) {
                dev_err(dsaf_dev->dev, "get dsaf mode fail, ret=%d!\n", ret);
index 31c312f9826e9d8bc644b0aa1c6d6852ea25e5bd..40205b910f80e66a439c14b53e7f2c907b0fbeb1 100644 (file)
@@ -18,6 +18,7 @@ struct hns_mac_cb;
 
 #define DSAF_DRV_NAME "hns_dsaf"
 #define DSAF_MOD_VERSION "v1.0"
+#define DSAF_DEVICE_NAME "dsaf"
 
 #define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000
 
index 0e30846a24f89b632028211f60ee82018432a7e0..3f77ff77abbc4594f8aeb0ccc40c6691fc122fac 100644 (file)
@@ -1802,7 +1802,7 @@ static int hns_nic_try_get_ae(struct net_device *ndev)
        int ret;
 
        h = hnae_get_handle(&priv->netdev->dev,
-                           priv->ae_name, priv->port_id, NULL);
+                           priv->ae_node, priv->port_id, NULL);
        if (IS_ERR_OR_NULL(h)) {
                ret = PTR_ERR(h);
                dev_dbg(priv->dev, "has not handle, register notifier!\n");
@@ -1880,13 +1880,16 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
        else
                priv->enet_ver = AE_VERSION_2;
 
-       ret = of_property_read_string(node, "ae-name", &priv->ae_name);
-       if (ret)
-               goto out_read_string_fail;
+       priv->ae_node = (void *)of_parse_phandle(node, "ae-handle", 0);
+       if (IS_ERR_OR_NULL(priv->ae_node)) {
+               ret = PTR_ERR(priv->ae_node);
+               dev_err(dev, "not find ae-handle\n");
+               goto out_read_prop_fail;
+       }
 
        ret = of_property_read_u32(node, "port-id", &priv->port_id);
        if (ret)
-               goto out_read_string_fail;
+               goto out_read_prop_fail;
 
        hns_init_mac_addr(ndev);
 
@@ -1945,7 +1948,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 
 out_notify_fail:
        (void)cancel_work_sync(&priv->service_task);
-out_read_string_fail:
+out_read_prop_fail:
        free_netdev(ndev);
        return ret;
 }
index 4b75270f014e765ba1cc02010c4175cafddda84c..c68ab3d34fc224ef00f43532f929fbd533b2ee80 100644 (file)
@@ -51,7 +51,7 @@ struct hns_nic_ops {
 };
 
 struct hns_nic_priv {
-       const char *ae_name;
+       const struct device_node *ae_node;
        u32 enet_ver;
        u32 port_id;
        int phy_mode;
index 1d5c3e16d8f4f4ad1f3ba580ee3a4a9c48663551..3daf2d4a7ca057a66e9ad797baa581c0d92caeba 100644 (file)
@@ -194,7 +194,6 @@ static const char *hp100_isa_tbl[] = {
 };
 #endif
 
-#ifdef CONFIG_EISA
 static struct eisa_device_id hp100_eisa_tbl[] = {
        { "HWPF180" }, /* HP J2577 rev A */
        { "HWP1920" }, /* HP 27248B */
@@ -205,9 +204,7 @@ static struct eisa_device_id hp100_eisa_tbl[] = {
        { "" }         /* Mandatory final entry ! */
 };
 MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
-#endif
 
-#ifdef CONFIG_PCI
 static const struct pci_device_id hp100_pci_tbl[] = {
        {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
        {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
@@ -219,7 +216,6 @@ static const struct pci_device_id hp100_pci_tbl[] = {
        {}                      /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
-#endif
 
 static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
 static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
@@ -2842,7 +2838,6 @@ static void cleanup_dev(struct net_device *d)
        free_netdev(d);
 }
 
-#ifdef CONFIG_EISA
 static int hp100_eisa_probe(struct device *gendev)
 {
        struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
@@ -2884,9 +2879,7 @@ static struct eisa_driver hp100_eisa_driver = {
                .remove  = hp100_eisa_remove,
         }
 };
-#endif
 
-#ifdef CONFIG_PCI
 static int hp100_pci_probe(struct pci_dev *pdev,
                           const struct pci_device_id *ent)
 {
@@ -2955,7 +2948,6 @@ static struct pci_driver hp100_pci_driver = {
        .probe          = hp100_pci_probe,
        .remove         = hp100_pci_remove,
 };
-#endif
 
 /*
  *  module section
@@ -3032,23 +3024,17 @@ static int __init hp100_module_init(void)
        err = hp100_isa_init();
        if (err && err != -ENODEV)
                goto out;
-#ifdef CONFIG_EISA
        err = eisa_driver_register(&hp100_eisa_driver);
        if (err && err != -ENODEV)
                goto out2;
-#endif
-#ifdef CONFIG_PCI
        err = pci_register_driver(&hp100_pci_driver);
        if (err && err != -ENODEV)
                goto out3;
-#endif
  out:
        return err;
  out3:
-#ifdef CONFIG_EISA
        eisa_driver_unregister (&hp100_eisa_driver);
  out2:
-#endif
        hp100_isa_cleanup();
        goto out;
 }
@@ -3057,12 +3043,8 @@ static int __init hp100_module_init(void)
 static void __exit hp100_module_exit(void)
 {
        hp100_isa_cleanup();
-#ifdef CONFIG_EISA
        eisa_driver_unregister (&hp100_eisa_driver);
-#endif
-#ifdef CONFIG_PCI
        pci_unregister_driver (&hp100_pci_driver);
-#endif
 }
 
 module_init(hp100_module_init)
index bb4612c159fd197db5b2a750027400b48cbdec77..8f3b53e0dc46c28965d00ea994940949404361be 100644 (file)
@@ -7117,9 +7117,7 @@ static void i40e_service_task(struct work_struct *work)
        i40e_watchdog_subtask(pf);
        i40e_fdir_reinit_subtask(pf);
        i40e_sync_filters_subtask(pf);
-#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
        i40e_sync_udp_filters_subtask(pf);
-#endif
        i40e_clean_adminq_subtask(pf);
 
        i40e_service_event_complete(pf);
@@ -8515,6 +8513,8 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 }
 
 #endif
+
+#if IS_ENABLED(CONFIG_VXLAN)
 /**
  * i40e_add_vxlan_port - Get notifications about VXLAN ports that come up
  * @netdev: This physical port's netdev
@@ -8524,7 +8524,6 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
 static void i40e_add_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8557,7 +8556,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
        pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
        pf->pending_udp_bitmap |= BIT_ULL(next_idx);
        pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
-#endif
 }
 
 /**
@@ -8569,7 +8567,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev,
 static void i40e_del_vxlan_port(struct net_device *netdev,
                                sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_VXLAN)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8592,9 +8589,10 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
                netdev_warn(netdev, "vxlan port %d was not found, not deleting\n",
                            ntohs(port));
        }
-#endif
 }
+#endif
 
+#if IS_ENABLED(CONFIG_GENEVE)
 /**
  * i40e_add_geneve_port - Get notifications about GENEVE ports that come up
  * @netdev: This physical port's netdev
@@ -8604,7 +8602,6 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
 static void i40e_add_geneve_port(struct net_device *netdev,
                                 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8639,7 +8636,6 @@ static void i40e_add_geneve_port(struct net_device *netdev,
        pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
 
        dev_info(&pf->pdev->dev, "adding geneve port %d\n", ntohs(port));
-#endif
 }
 
 /**
@@ -8651,7 +8647,6 @@ static void i40e_add_geneve_port(struct net_device *netdev,
 static void i40e_del_geneve_port(struct net_device *netdev,
                                 sa_family_t sa_family, __be16 port)
 {
-#if IS_ENABLED(CONFIG_GENEVE)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
@@ -8677,8 +8672,8 @@ static void i40e_del_geneve_port(struct net_device *netdev,
                netdev_warn(netdev, "geneve port %d was not found, not deleting\n",
                            ntohs(port));
        }
-#endif
 }
+#endif
 
 static int i40e_get_phys_port_id(struct net_device *netdev,
                                 struct netdev_phys_item_id *ppid)
index 720516b0e8eec59debf3b41195f9a2617eef169b..47bd8b3145a79a9bf2e299fc303100be4bf8289c 100644 (file)
@@ -2313,8 +2313,8 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
        struct iphdr *this_ip_hdr;
        u32 network_hdr_len;
        u8 l4_hdr = 0;
-       struct udphdr *oudph;
-       struct iphdr *oiph;
+       struct udphdr *oudph = NULL;
+       struct iphdr *oiph = NULL;
        u32 l4_tunnel = 0;
 
        if (skb->encapsulation) {
index a0c03834a2f7b3d7494b864db6e96ee590b36c81..55831188bc32431e935550a7da5671fa1d24d89a 100644 (file)
@@ -762,10 +762,10 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 
        if (length <= 8 && (uintptr_t)data & 0x7) {
                /* Copy unaligned small data fragment to TSO header data area */
-               memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+               memcpy(txq->tso_hdrs + tx_index * TSO_HEADER_SIZE,
                       data, length);
                desc->buf_ptr = txq->tso_hdrs_dma
-                       + txq->tx_curr_desc * TSO_HEADER_SIZE;
+                       + tx_index * TSO_HEADER_SIZE;
        } else {
                /* Alignment is okay, map buffer and hand off to hardware */
                txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
index fabc8df40392572d607f3e3417e60b54a67b4bf3..662c2ee268c7c7512c7d5f1290e1cb02fa3883dc 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
-#include <linux/mbus.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
 #include <linux/io.h>
-#include <net/tso.h>
+#include <linux/kernel.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
 #include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/cpu.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tso.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
@@ -373,6 +373,8 @@ struct mvneta_port {
 
        /* Core clock */
        struct clk *clk;
+       /* AXI clock */
+       struct clk *clk_bus;
        u8 mcast_count[256];
        u16 tx_ring_size;
        u16 rx_ring_size;
@@ -3242,26 +3244,25 @@ static void mvneta_ethtool_update_stats(struct mvneta_port *pp)
        const struct mvneta_statistic *s;
        void __iomem *base = pp->base;
        u32 high, low, val;
+       u64 val64;
        int i;
 
        for (i = 0, s = mvneta_statistics;
             s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
             s++, i++) {
-               val = 0;
-
                switch (s->type) {
                case T_REG_32:
                        val = readl_relaxed(base + s->offset);
+                       pp->ethtool_stats[i] += val;
                        break;
                case T_REG_64:
                        /* Docs say to read low 32-bit then high */
                        low = readl_relaxed(base + s->offset);
                        high = readl_relaxed(base + s->offset + 4);
-                       val = (u64)high << 32 | low;
+                       val64 = (u64)high << 32 | low;
+                       pp->ethtool_stats[i] += val64;
                        break;
                }
-
-               pp->ethtool_stats[i] += val;
        }
 }
 
@@ -3605,7 +3606,9 @@ static int mvneta_probe(struct platform_device *pdev)
 
        pp->indir[0] = rxq_def;
 
-       pp->clk = devm_clk_get(&pdev->dev, NULL);
+       pp->clk = devm_clk_get(&pdev->dev, "core");
+       if (IS_ERR(pp->clk))
+               pp->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pp->clk)) {
                err = PTR_ERR(pp->clk);
                goto err_put_phy_node;
@@ -3613,6 +3616,10 @@ static int mvneta_probe(struct platform_device *pdev)
 
        clk_prepare_enable(pp->clk);
 
+       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        pp->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(pp->base)) {
@@ -3724,6 +3731,7 @@ err_free_stats:
 err_free_ports:
        free_percpu(pp->ports);
 err_clk:
+       clk_disable_unprepare(pp->clk_bus);
        clk_disable_unprepare(pp->clk);
 err_put_phy_node:
        of_node_put(phy_node);
@@ -3741,6 +3749,7 @@ static int mvneta_remove(struct platform_device *pdev)
        struct mvneta_port *pp = netdev_priv(dev);
 
        unregister_netdev(dev);
+       clk_disable_unprepare(pp->clk_bus);
        clk_disable_unprepare(pp->clk);
        free_percpu(pp->ports);
        free_percpu(pp->stats);
index 2c2baab9d88044d77d910653f882e9815fa57561..d66c690a859773d24db2cc835f8c365135023bf4 100644 (file)
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
                [29] = "802.1ad offload support",
                [31] = "Modifying loopback source checks using UPDATE_QP support",
                [32] = "Loopback source checks support",
+               [33] = "RoCEv2 support"
        };
        int i;
 
@@ -626,6 +627,8 @@ out:
        return err;
 }
 
+static void disable_unsupported_roce_caps(void *buf);
+
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
        struct mlx4_cmd_mailbox *mailbox;
@@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        if (err)
                goto out;
 
+       if (mlx4_is_mfunc(dev))
+               disable_unsupported_roce_caps(outbox);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
        dev_cap->reserved_qps = 1 << (field & 0xf);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
@@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
        MLX4_GET(dev_cap->bmme_flags, outbox,
                 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+       if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
        if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
@@ -1161,6 +1168,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
        if (err)
                return err;
 
+       disable_unsupported_roce_caps(outbox->buf);
        /* add port mng change event capability and disable mw type 1
         * unconditionally to slaves
         */
@@ -1258,6 +1266,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
        return 0;
 }
 
+static void disable_unsupported_roce_caps(void *buf)
+{
+       u32 flags;
+
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+       flags &= ~(1UL << 31);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+       flags &= ~(1UL << 24);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+       MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+       flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+       MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
 int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
                            struct mlx4_vhcr *vhcr,
                            struct mlx4_cmd_mailbox *inbox,
@@ -2239,7 +2262,8 @@ struct mlx4_config_dev {
        __be32  rsvd1[3];
        __be16  vxlan_udp_dport;
        __be16  rsvd2;
-       __be32  rsvd3;
+       __be16  roce_v2_entropy;
+       __be16  roce_v2_udp_dport;
        __be32  roce_flags;
        __be32  rsvd4[25];
        __be16  rsvd5;
@@ -2248,6 +2272,7 @@ struct mlx4_config_dev {
 };
 
 #define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
 #define MLX4_DISABLE_RX_PORT BIT(18)
 
 static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
@@ -2365,6 +2390,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
        return mlx4_CONFIG_DEV_set(dev, &config_dev);
 }
 
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+       struct mlx4_config_dev config_dev;
+
+       memset(&config_dev, 0, sizeof(config_dev));
+       config_dev.update_flags    = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+       config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+       return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
 int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
 {
        struct mlx4_cmd_mailbox *mailbox;
index 2404c22ad2b2ac5d890798dc5bfe7ae608232e2c..7baef52db6b7586cf41df128c8ddecc581bc1568 100644 (file)
@@ -780,7 +780,10 @@ struct mlx4_set_port_general_context {
        u16 reserved1;
        u8 v_ignore_fcs;
        u8 flags;
-       u8 ignore_fcs;
+       union {
+               u8 ignore_fcs;
+               u8 roce_mode;
+       };
        u8 reserved2;
        __be16 mtu;
        u8 pptx;
index f2550425c25147e0881fdabfb20d8514cb37a088..787b7bb54d52acd094570283bf6793f1bfb0dab0 100644 (file)
@@ -1520,6 +1520,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
        return err;
 }
 
+#define SET_PORT_ROCE_2_FLAGS          0x10
+#define MLX4_SET_PORT_ROCE_V1_V2       0x2
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
                          u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
 {
@@ -1539,6 +1541,11 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
        context->pprx = (pprx * (!pfcrx)) << 7;
        context->pfcrx = pfcrx;
 
+       if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+               context->flags |= SET_PORT_ROCE_2_FLAGS;
+               context->roce_mode |=
+                       MLX4_SET_PORT_ROCE_V1_V2 << 4;
+       }
        in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
        err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
                       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
index 168823dde79f3dd48596bdad5a4ea72a95ed0404..d1cd9c32a9ae83912e9b3b552c5680768b8449c3 100644 (file)
@@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
        }
 
+       if ((cur_state == MLX4_QP_STATE_RTR) &&
+           (new_state == MLX4_QP_STATE_RTS) &&
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+               context->roce_entropy =
+                       cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
        *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
        memcpy(mailbox->buf + 8, context, sizeof *context);
 
@@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
        return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+       struct mlx4_qp_context context;
+       struct mlx4_qp qp;
+       int err;
+
+       qp.qpn = qpn;
+       err = mlx4_qp_query(dev, &qp, &context);
+       if (!err) {
+               u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+               u16 folded_dst = folded_qp(dest_qpn);
+               u16 folded_src = folded_qp(qpn);
+
+               return (dest_qpn != qpn) ?
+                       ((folded_dst ^ folded_src) | 0xC000) :
+                       folded_src | 0xC000;
+       }
+       return 0xdead;
+}
index 9ea49a8933231808fff014f5a4570b7e380f5b44..aac071a7e830b5fd777da7a5e4d014d802ad70d0 100644 (file)
@@ -39,8 +39,8 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
 #include "wq.h"
-#include "transobj.h"
 #include "mlx5_core.h"
 
 #define MLX5E_MAX_NUM_TC       8
index c56d91a2812b04bf0857be048a7fb11fc8b2c1d0..6a3e430f10624e637fb8b3dcfaade968d2a83c53 100644 (file)
@@ -2241,7 +2241,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
                goto err_unmap_free_uar;
        }
 
-       err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+       err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
        if (err) {
                mlx5_core_err(mdev, "alloc td failed, %d\n", err);
                goto err_dealloc_pd;
@@ -2324,7 +2324,7 @@ err_destroy_mkey:
        mlx5_core_destroy_mkey(mdev, &priv->mr);
 
 err_dealloc_transport_domain:
-       mlx5_dealloc_transport_domain(mdev, priv->tdn);
+       mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
 
 err_dealloc_pd:
        mlx5_core_dealloc_pd(mdev, priv->pdn);
@@ -2356,7 +2356,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
        mlx5e_close_drop_rq(priv);
        mlx5e_destroy_tises(priv);
        mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
-       mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+       mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
        mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
        mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
        free_netdev(netdev);
index 23c244a7e5d73d5fe0042f793d7ca955d945bf7b..647a3ca2c2a925c1d6157febb59fa51ea39872da 100644 (file)
@@ -230,6 +230,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
                case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
                case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
                        rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+                       rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
                        mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
                                      eqe_type_str(eqe->type), eqe->type, rsn);
                        mlx5_rsc_event(dev, rsn, eqe->type);
index b37749a3730e576ef798d02ae380caad91d44fbf..1545a944c309bf3205ad49fc1de90bd21c3eaacd 100644 (file)
@@ -78,6 +78,11 @@ struct mlx5_device_context {
        void                   *context;
 };
 
+enum {
+       MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+       MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
 static struct mlx5_profile profile[] = {
        [0] = {
                .mask           = 0,
@@ -387,7 +392,7 @@ query_ex:
        return err;
 }
 
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
        u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
        int err;
@@ -395,6 +400,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
        memset(out, 0, sizeof(out));
 
        MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+       MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
        err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
        if (err)
                return err;
@@ -404,6 +410,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
        return err;
 }
 
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+       void *set_ctx;
+       void *set_hca_cap;
+       int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+       int req_endianness;
+       int err;
+
+       if (MLX5_CAP_GEN(dev, atomic)) {
+               err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
+                                        HCA_CAP_OPMOD_GET_CUR);
+               if (err)
+                       return err;
+       } else {
+               return 0;
+       }
+
+       req_endianness =
+               MLX5_CAP_ATOMIC(dev,
+                               supported_atomic_req_8B_endianess_mode_1);
+
+       if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+               return 0;
+
+       set_ctx = kzalloc(set_sz, GFP_KERNEL);
+       if (!set_ctx)
+               return -ENOMEM;
+
+       set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+       /* Set requestor to host endianness */
+       MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+                MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+       err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+       kfree(set_ctx);
+       return err;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
        void *set_ctx = NULL;
@@ -445,7 +491,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 
        MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
 
-       err = set_caps(dev, set_ctx, set_sz);
+       err = set_caps(dev, set_ctx, set_sz,
+                      MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 
 query_ex:
        kfree(set_ctx);
@@ -667,7 +714,6 @@ clean:
        return err;
 }
 
-#ifdef CONFIG_MLX5_CORE_EN
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
@@ -720,7 +766,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 
        return -ENOTSUPP;
 }
-#endif
 
 static int map_bf_area(struct mlx5_core_dev *dev)
 {
@@ -966,13 +1011,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto err_pagealloc_cleanup;
        }
 
-#ifdef CONFIG_MLX5_CORE_EN
        err = mlx5_core_set_issi(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to set issi\n");
                goto err_disable_hca;
        }
-#endif
 
        err = mlx5_satisfy_startup_pages(dev, 1);
        if (err) {
@@ -992,6 +1035,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto reclaim_boot_pages;
        }
 
+       err = handle_hca_cap_atomic(dev);
+       if (err) {
+               dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+               goto reclaim_boot_pages;
+       }
+
        err = mlx5_satisfy_startup_pages(dev, 0);
        if (err) {
                dev_err(&pdev->dev, "failed to allocate init pages\n");
index 30e2ba3f5f16cb871fd20b0d05bb99433ea22ab1..def289375ecbf1a1543dcbbd4cf04e0a9b03ce1b 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/mlx5/cmd.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
 
 #include "mlx5_core.h"
 
@@ -67,6 +68,52 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
                complete(&common->free);
 }
 
+static u64 qp_allowed_event_types(void)
+{
+       u64 mask;
+
+       mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+              BIT(MLX5_EVENT_TYPE_COMM_EST) |
+              BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+              BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+              BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+              BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+              BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+              BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+       return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+       u64 mask;
+
+       mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+              BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+       return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+       return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+       switch (rsc_type) {
+       case MLX5_EVENT_QUEUE_TYPE_QP:
+               return BIT(event_type) & qp_allowed_event_types();
+       case MLX5_EVENT_QUEUE_TYPE_RQ:
+               return BIT(event_type) & rq_allowed_event_types();
+       case MLX5_EVENT_QUEUE_TYPE_SQ:
+               return BIT(event_type) & sq_allowed_event_types();
+       default:
+               WARN(1, "Event arrived for unknown resource type");
+               return false;
+       }
+}
+
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
 {
        struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
@@ -75,8 +122,16 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
        if (!common)
                return;
 
+       if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+               mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+                              event_type, rsn);
+               return;
+       }
+
        switch (common->res) {
        case MLX5_RES_QP:
+       case MLX5_RES_RQ:
+       case MLX5_RES_SQ:
                qp = (struct mlx5_core_qp *)common;
                qp->event(qp, event_type);
                break;
@@ -177,27 +232,56 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
 }
 #endif
 
+static int create_qprqsq_common(struct mlx5_core_dev *dev,
+                               struct mlx5_core_qp *qp,
+                               int rsc_type)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       int err;
+
+       qp->common.res = rsc_type;
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree,
+                               qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+                               qp);
+       spin_unlock_irq(&table->lock);
+       if (err)
+               return err;
+
+       atomic_set(&qp->common.refcount, 1);
+       init_completion(&qp->common.free);
+       qp->pid = current->pid;
+
+       return 0;
+}
+
+static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *qp)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       unsigned long flags;
+
+       spin_lock_irqsave(&table->lock, flags);
+       radix_tree_delete(&table->tree,
+                         qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+       spin_unlock_irqrestore(&table->lock, flags);
+       mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+       wait_for_completion(&qp->common.free);
+}
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
                        int inlen)
 {
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
        struct mlx5_create_qp_mbox_out out;
        struct mlx5_destroy_qp_mbox_in din;
        struct mlx5_destroy_qp_mbox_out dout;
        int err;
-       void *qpc;
 
        memset(&out, 0, sizeof(out));
        in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
 
-       if (dev->issi) {
-               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
-               /* 0xffffff means we ask to work with cqe version 0 */
-               MLX5_SET(qpc, qpc, user_index, 0xffffff);
-       }
-
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
        if (err) {
                mlx5_core_warn(dev, "ret %d\n", err);
@@ -213,24 +297,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
        qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
        mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
-       qp->common.res = MLX5_RES_QP;
-       spin_lock_irq(&table->lock);
-       err = radix_tree_insert(&table->tree, qp->qpn, qp);
-       spin_unlock_irq(&table->lock);
-       if (err) {
-               mlx5_core_warn(dev, "err %d\n", err);
+       err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+       if (err)
                goto err_cmd;
-       }
 
        err = mlx5_debug_qp_add(dev, qp);
        if (err)
                mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
                              qp->qpn);
 
-       qp->pid = current->pid;
-       atomic_set(&qp->common.refcount, 1);
        atomic_inc(&dev->num_qps);
-       init_completion(&qp->common.free);
 
        return 0;
 
@@ -250,18 +326,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 {
        struct mlx5_destroy_qp_mbox_in in;
        struct mlx5_destroy_qp_mbox_out out;
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
-       unsigned long flags;
        int err;
 
        mlx5_debug_qp_remove(dev, qp);
 
-       spin_lock_irqsave(&table->lock, flags);
-       radix_tree_delete(&table->tree, qp->qpn);
-       spin_unlock_irqrestore(&table->lock, flags);
-
-       mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
-       wait_for_completion(&qp->common.free);
+       destroy_qprqsq_common(dev, qp);
 
        memset(&in, 0, sizeof(in));
        memset(&out, 0, sizeof(out));
@@ -279,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
 
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
-                       enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
                        struct mlx5_modify_qp_mbox_in *in, int sqd_event,
                        struct mlx5_core_qp *qp)
 {
-       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
-               [MLX5_QP_STATE_RST] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
-               },
-               [MLX5_QP_STATE_INIT]  = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
-                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
-               },
-               [MLX5_QP_STATE_RTR]   = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
-               },
-               [MLX5_QP_STATE_RTS]   = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
-               },
-               [MLX5_QP_STATE_SQD] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-               },
-               [MLX5_QP_STATE_SQER] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
-               },
-               [MLX5_QP_STATE_ERR] = {
-                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
-                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
-               }
-       };
-
        struct mlx5_modify_qp_mbox_out out;
        int err = 0;
-       u16 op;
-
-       if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
-           !optab[cur_state][new_state])
-               return -EINVAL;
 
        memset(&out, 0, sizeof(out));
-       op = optab[cur_state][new_state];
-       in->hdr.opcode = cpu_to_be16(op);
+       in->hdr.opcode = cpu_to_be16(operation);
        in->qpn = cpu_to_be32(qp->qpn);
        err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
        if (err)
@@ -449,3 +474,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
 #endif
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *rq)
+{
+       int err;
+       u32 rqn;
+
+       err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+       if (err)
+               return err;
+
+       rq->qpn = rqn;
+       err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+       if (err)
+               goto err_destroy_rq;
+
+       return 0;
+
+err_destroy_rq:
+       mlx5_core_destroy_rq(dev, rq->qpn);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *rq)
+{
+       destroy_qprqsq_common(dev, rq);
+       mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *sq)
+{
+       int err;
+       u32 sqn;
+
+       err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+       if (err)
+               return err;
+
+       sq->qpn = sqn;
+       err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+       if (err)
+               goto err_destroy_sq;
+
+       return 0;
+
+err_destroy_sq:
+       mlx5_core_destroy_sq(dev, sq->qpn);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *sq)
+{
+       destroy_qprqsq_common(dev, sq);
+       mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
index ffada801976bff52ef05320d5605f08798af3129..04bc522605a03bb9cb7e6602f9c003ccfc28433b 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/mlx5/srq.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
 
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
 {
@@ -241,8 +241,6 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 
        memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
        memcpy(pas, in->pas, pas_size);
-       /* 0xffffff means we ask to work with cqe version 0 */
-       MLX5_SET(xrc_srqc,          xrc_srqc,  user_index, 0xffffff);
        MLX5_SET(create_xrc_srq_in, create_in, opcode,
                 MLX5_CMD_OP_CREATE_XRC_SRQ);
 
index d7068f54e80066acdcaf6bf081082d50feeebc57..03a5093ffeb72ab6f5c490d1eff42967ceb7f0f4 100644 (file)
@@ -32,9 +32,9 @@
 
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
 
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 {
        u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
        u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
@@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
 
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 {
        u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
        u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
@@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
 
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 {
@@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
        memset(out, 0, sizeof(out));
        return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_modify_rq);
 
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
 {
@@ -108,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+       int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+       MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+       MLX5_SET(query_rq_in, in, rqn, rqn);
+
+       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 {
        u32 out[MLX5_ST_SZ_DW(create_sq_out)];
@@ -133,6 +148,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
        memset(out, 0, sizeof(out));
        return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_modify_sq);
 
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
 {
@@ -147,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+       int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+       MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+       MLX5_SET(query_sq_in, in, sqn, sqn);
+
+       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tirn)
 {
@@ -162,6 +190,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_tir);
 
 int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
                         int inlen)
@@ -187,6 +216,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
 
 int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tisn)
@@ -203,6 +233,19 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+                        int inlen)
+{
+       u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+       MLX5_SET(modify_tis_in, in, tisn, tisn);
+       MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
 
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 {
@@ -216,6 +259,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 
        mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
 int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rmpn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
deleted file mode 100644 (file)
index 74cae51..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __TRANSOBJ_H__
-#define __TRANSOBJ_H__
-
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
-int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                       u32 *rqn);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
-void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
-int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                       u32 *sqn);
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
-void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *tirn);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
-                        int inlen);
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *tisn);
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
-int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rqtn);
-int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
-                        int inlen);
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
-
-#endif /* __TRANSOBJ_H__ */
index 076197efea9b07916480a97a8258e84703a3db66..c7398b95aecdc0286480f85564e5a42660275bd6 100644 (file)
@@ -76,7 +76,7 @@ u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 
        return MLX5_GET(query_vport_state_out, out, admin_state);
 }
-EXPORT_SYMBOL(mlx5_query_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
 
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                  u16 vport, u8 state)
@@ -104,7 +104,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
 
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
                                        u32 *out, int outlen)
@@ -151,12 +151,9 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                nic_vport_context.permanent_address);
 
        err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
-       if (err)
-               goto out;
-
-       ether_addr_copy(addr, &out_addr[2]);
+       if (!err)
+               ether_addr_copy(addr, &out_addr[2]);
 
-out:
        kvfree(out);
        return err;
 }
@@ -197,7 +194,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 
        return err;
 }
-EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
 
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
                                  u32 vport,
@@ -430,6 +427,68 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
 
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+                                          u64 *system_image_guid)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+                                       nic_vport_context.system_image_guid);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+                               nic_vport_context.node_guid);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+                                       u16 *qkey_viol_cntr)
+{
+       u32 *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+       *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+                                  nic_vport_context.qkey_violation_counter);
+
+       kfree(out);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
                             u8 port_num, u16  vf_num, u16 gid_index,
                             union ib_gid *gid)
@@ -750,3 +809,44 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
        return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum mlx5_vport_roce_state {
+       MLX5_VPORT_ROCE_DISABLED = 0,
+       MLX5_VPORT_ROCE_ENABLED  = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+                                           enum mlx5_vport_roce_state state)
+{
+       void *in;
+       int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+       int err;
+
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               mlx5_core_warn(mdev, "failed to allocate inbox\n");
+               return -ENOMEM;
+       }
+
+       MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+       MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+                state);
+
+       err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+       return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+       return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
index 0c5237264e3e29519933180f307a531c8b34c054..bb77e2207804d9c3431b091f8d219297c8f92300 100644 (file)
@@ -1044,6 +1044,92 @@ static inline void mlxsw_reg_sftr_pack(char *payload,
        mlxsw_reg_sftr_port_mask_set(payload, port, 1);
 }
 
+/* SFDF - Switch Filtering DB Flush
+ * --------------------------------
+ * The switch filtering DB flush register is used to flush the FDB.
+ * Note that FDB notifications are flushed as well.
+ */
+#define MLXSW_REG_SFDF_ID 0x2013
+#define MLXSW_REG_SFDF_LEN 0x14
+
+static const struct mlxsw_reg_info mlxsw_reg_sfdf = {
+       .id = MLXSW_REG_SFDF_ID,
+       .len = MLXSW_REG_SFDF_LEN,
+};
+
+/* reg_sfdf_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8);
+
+enum mlxsw_reg_sfdf_flush_type {
+       MLXSW_REG_SFDF_FLUSH_PER_SWID,
+       MLXSW_REG_SFDF_FLUSH_PER_FID,
+       MLXSW_REG_SFDF_FLUSH_PER_PORT,
+       MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
+       MLXSW_REG_SFDF_FLUSH_PER_LAG,
+       MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+};
+
+/* reg_sfdf_flush_type
+ * Flush type.
+ * 0 - All SWID dynamic entries are flushed.
+ * 1 - All FID dynamic entries are flushed.
+ * 2 - All dynamic entries pointing to port are flushed.
+ * 3 - All FID dynamic entries pointing to port are flushed.
+ * 4 - All dynamic entries pointing to LAG are flushed.
+ * 5 - All FID dynamic entries pointing to LAG are flushed.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
+
+/* reg_sfdf_flush_static
+ * Static.
+ * 0 - Flush only dynamic entries.
+ * 1 - Flush both dynamic and static entries.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1);
+
+static inline void mlxsw_reg_sfdf_pack(char *payload,
+                                      enum mlxsw_reg_sfdf_flush_type type)
+{
+       MLXSW_REG_ZERO(sfdf, payload);
+       mlxsw_reg_sfdf_flush_type_set(payload, type);
+       mlxsw_reg_sfdf_flush_static_set(payload, true);
+}
+
+/* reg_sfdf_fid
+ * FID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16);
+
+/* reg_sfdf_system_port
+ * Port to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16);
+
+/* reg_sfdf_port_fid_system_port
+ * Port to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16);
+
+/* reg_sfdf_lag_id
+ * LAG ID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10);
+
+/* reg_sfdf_lag_fid_lag_id
+ * LAG ID to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10);
+
 /* SLDR - Switch LAG Descriptor Register
  * -----------------------------------------
  * The switch LAG descriptor register is populated by LAG descriptors.
@@ -1701,20 +1787,20 @@ MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8);
  * Module number.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
 
 /* reg_pmlp_tx_lane
  * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 16, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
 
 /* reg_pmlp_rx_lane
  * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
  * equal to Tx lane.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 24, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
 
 static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 {
@@ -3121,6 +3207,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
                return "SFGC";
        case MLXSW_REG_SFTR_ID:
                return "SFTR";
+       case MLXSW_REG_SFDF_ID:
+               return "SFDF";
        case MLXSW_REG_SLDR_ID:
                return "SLDR";
        case MLXSW_REG_SLCR_ID:
index ce6845d534a83f7acea04c0d0c4401103192f0ab..217856bdd400474d400d1e4e3dd275a096f50db5 100644 (file)
@@ -1979,6 +1979,115 @@ static struct mlxsw_driver mlxsw_sp_driver = {
        .profile                = &mlxsw_sp_config_profile,
 };
 
+static int
+mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT);
+       mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                   u16 fid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+       mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl,
+                                               mlxsw_sp_port->local_port);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG);
+       mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port,
+                                     u16 fid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid);
+       mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static int
+__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err, last_err = 0;
+       u16 vid;
+
+       for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid);
+               if (err)
+                       last_err = err;
+       }
+
+       return last_err;
+}
+
+static int
+__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err, last_err = 0;
+       u16 vid;
+
+       for (vid = 1; vid < VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid);
+               if (err)
+                       last_err = err;
+       }
+
+       return last_err;
+}
+
+static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       if (!list_empty(&mlxsw_sp_port->vports_list))
+               if (mlxsw_sp_port->lagged)
+                       return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port);
+               else
+                       return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port);
+       else
+               if (mlxsw_sp_port->lagged)
+                       return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port);
+               else
+                       return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport)
+{
+       u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport);
+       u16 fid = mlxsw_sp_vfid_to_fid(vfid);
+
+       if (mlxsw_sp_vport->lagged)
+               return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport,
+                                                            fid);
+       else
+               return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid);
+}
+
 static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 {
        return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
@@ -2006,10 +2115,14 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port)
        return 0;
 }
 
-static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+                                     bool flush_fdb)
 {
        struct net_device *dev = mlxsw_sp_port->dev;
 
+       if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port))
+               netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
+
        mlxsw_sp_port->learning = 0;
        mlxsw_sp_port->learning_sync = 0;
        mlxsw_sp_port->uc_flood = 0;
@@ -2200,10 +2313,15 @@ err_col_port_enable:
        return err;
 }
 
+static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
+                                      struct net_device *br_dev,
+                                      bool flush_fdb);
+
 static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
                                   struct net_device *lag_dev)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       struct mlxsw_sp_port *mlxsw_sp_vport;
        struct mlxsw_sp_upper *lag;
        u16 lag_id = mlxsw_sp_port->lag_id;
        int err;
@@ -2220,7 +2338,32 @@ static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
        if (err)
                return err;
 
+       /* In case we leave a LAG device that has bridges built on top,
+        * then their teardown sequence is never issued and we need to
+        * invoke the necessary cleanup routines ourselves.
+        */
+       list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list,
+                           vport.list) {
+               struct net_device *br_dev;
+
+               if (!mlxsw_sp_vport->bridged)
+                       continue;
+
+               br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false);
+       }
+
+       if (mlxsw_sp_port->bridged) {
+               mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
+               mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
+
+               if (lag->ref_count == 1)
+                       mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+       }
+
        if (lag->ref_count == 1) {
+               if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port))
+                       netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
                err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
                if (err)
                        return err;
@@ -2272,9 +2415,6 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
 }
 
-static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                      struct net_device *br_dev);
-
 static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port,
                                   struct net_device *vlan_dev)
 {
@@ -2312,7 +2452,7 @@ static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port,
                struct net_device *br_dev;
 
                br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport);
-               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev);
+               mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true);
        }
 
        mlxsw_sp_vport->dev = mlxsw_sp_port->dev;
@@ -2374,7 +2514,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
                                }
                                mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev);
                        } else {
-                               err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port);
+                               err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
+                                                                true);
                                mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
                                if (err) {
                                        netdev_err(dev, "Failed to leave bridge\n");
@@ -2541,7 +2682,8 @@ static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
-                                      struct net_device *br_dev)
+                                      struct net_device *br_dev,
+                                      bool flush_fdb)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
        u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
@@ -2604,6 +2746,9 @@ static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport,
                goto err_vport_flood_set;
        }
 
+       if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport))
+               netdev_err(dev, "Failed to flush FDB\n");
+
        /* Switch between the vFIDs and destroy the old one if needed. */
        new_vfid->nr_vports++;
        mlxsw_sp_vport->vport.vfid = new_vfid;
@@ -2777,7 +2922,7 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev,
                        if (!mlxsw_sp_vport)
                                return NOTIFY_DONE;
                        err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport,
-                                                         upper_dev);
+                                                         upper_dev, true);
                        if (err) {
                                netdev_err(dev, "Failed to leave bridge\n");
                                return NOTIFY_BAD;
index a23dc610d2593aedce974eddf94789d3aaf181a6..7f42eb1c320e1c02ff12892c94261800e29eed64 100644 (file)
@@ -120,7 +120,6 @@ struct mlxsw_sp {
        } fdb_notify;
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
        u32 ageing_time;
-       struct mutex fdb_lock;  /* Make sure FDB sessions are atomic. */
        struct mlxsw_sp_upper master_bridge;
        struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 };
@@ -254,5 +253,6 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
                           __be16 __always_unused proto, u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
                             bool set, bool only_uc);
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
 
 #endif
index 45479ef5bcf4f44848f9989f83f46ce66fe4c11b..e492ca2cdecd9953d5f0ec4124f7077e839803dc 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/if_bridge.h>
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
+#include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
 #include "spectrum.h"
@@ -124,14 +125,14 @@ static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
        int err;
 
        switch (state) {
-       case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_FORWARDING:
                spms_state = MLXSW_REG_SPMS_STATE_FORWARDING;
                break;
-       case BR_STATE_LISTENING: /* fall-through */
        case BR_STATE_LEARNING:
                spms_state = MLXSW_REG_SPMS_STATE_LEARNING;
                break;
+       case BR_STATE_LISTENING: /* fall-through */
+       case BR_STATE_DISABLED: /* fall-through */
        case BR_STATE_BLOCKING:
                spms_state = MLXSW_REG_SPMS_STATE_DISCARDING;
                break;
@@ -936,6 +937,14 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                         vlan->vid_begin, vlan->vid_end, false);
 }
 
+void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       u16 vid;
+
+       for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+}
+
 static int
 mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
                             const struct switchdev_obj_port_fdb *fdb)
@@ -1040,10 +1049,12 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                  struct switchdev_obj_port_fdb *fdb,
-                                 switchdev_obj_dump_cb_t *cb)
+                                 switchdev_obj_dump_cb_t *cb,
+                                 struct net_device *orig_dev)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       u16 vport_vid = 0, vport_fid = 0;
+       struct mlxsw_sp_port *tmp;
+       u16 vport_fid = 0;
        char *sfd_pl;
        char mac[ETH_ALEN];
        u16 fid;
@@ -1058,13 +1069,11 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        if (!sfd_pl)
                return -ENOMEM;
 
-       mutex_lock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
        if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
                u16 tmp;
 
                tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port);
                vport_fid = mlxsw_sp_vfid_to_fid(tmp);
-               vport_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
        }
 
        mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
@@ -1088,12 +1097,13 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                                mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid,
                                                        &local_port);
                                if (local_port == mlxsw_sp_port->local_port) {
-                                       if (vport_fid && vport_fid != fid)
-                                               continue;
-                                       else if (vport_fid)
-                                               fdb->vid = vport_vid;
-                                       else
+                                       if (vport_fid && vport_fid == fid)
+                                               fdb->vid = 0;
+                                       else if (!vport_fid &&
+                                                !mlxsw_sp_fid_is_vfid(fid))
                                                fdb->vid = fid;
+                                       else
+                                               continue;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
                                        err = cb(&fdb->obj);
@@ -1104,14 +1114,22 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
                        case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
                                mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
                                                            mac, &fid, &lag_id);
-                               if (mlxsw_sp_port ==
-                                   mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
-                                       if (vport_fid && vport_fid != fid)
+                               tmp = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+                               if (tmp && tmp->local_port ==
+                                   mlxsw_sp_port->local_port) {
+                                       /* LAG records can only point to LAG
+                                        * devices or VLAN devices on top.
+                                        */
+                                       if (!netif_is_lag_master(orig_dev) &&
+                                           !is_vlan_dev(orig_dev))
                                                continue;
-                                       else if (vport_fid)
-                                               fdb->vid = vport_vid;
-                                       else
+                                       if (vport_fid && vport_fid == fid)
+                                               fdb->vid = 0;
+                                       else if (!vport_fid &&
+                                                !mlxsw_sp_fid_is_vfid(fid))
                                                fdb->vid = fid;
+                                       else
+                                               continue;
                                        ether_addr_copy(fdb->addr, mac);
                                        fdb->ndm_state = NUD_REACHABLE;
                                        err = cb(&fdb->obj);
@@ -1124,7 +1142,6 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
        } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT);
 
 out:
-       mutex_unlock(&mlxsw_sp_port->mlxsw_sp->fdb_lock);
        kfree(sfd_pl);
        return stored_err ? stored_err : err;
 }
@@ -1176,7 +1193,8 @@ static int mlxsw_sp_port_obj_dump(struct net_device *dev,
                break;
        case SWITCHDEV_OBJ_ID_PORT_FDB:
                err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port,
-                                            SWITCHDEV_OBJ_PORT_FDB(obj), cb);
+                                            SWITCHDEV_OBJ_PORT_FDB(obj), cb,
+                                            obj->orig_dev);
                break;
        default:
                err = -EOPNOTSUPP;
@@ -1194,14 +1212,14 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
        .switchdev_port_obj_dump        = mlxsw_sp_port_obj_dump,
 };
 
-static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync,
-                                       bool adding, char *mac, u16 vid,
+static void mlxsw_sp_fdb_call_notifiers(bool learning_sync, bool adding,
+                                       char *mac, u16 vid,
                                        struct net_device *dev)
 {
        struct switchdev_notifier_fdb_info info;
        unsigned long notifier_type;
 
-       if (learning && learning_sync) {
+       if (learning_sync) {
                info.addr = mac;
                info.vid = vid;
                notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
@@ -1237,7 +1255,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
                        netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n");
                        goto just_remove;
                }
-               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               vid = 0;
                /* Override the physical port with the vPort. */
                mlxsw_sp_port = mlxsw_sp_vport;
        } else {
@@ -1257,8 +1275,7 @@ do_fdb_op:
 
        if (!do_notification)
                return;
-       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-                                   mlxsw_sp_port->learning_sync,
+       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync,
                                    adding, mac, vid, mlxsw_sp_port->dev);
        return;
 
@@ -1273,6 +1290,7 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                                                bool adding)
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
+       struct net_device *dev;
        char mac[ETH_ALEN];
        u16 lag_vid = 0;
        u16 lag_id;
@@ -1298,11 +1316,13 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                        goto just_remove;
                }
 
-               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
-               lag_vid = vid;
+               lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport);
+               dev = mlxsw_sp_vport->dev;
+               vid = 0;
                /* Override the physical port with the vPort. */
                mlxsw_sp_port = mlxsw_sp_vport;
        } else {
+               dev = mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev;
                vid = fid;
        }
 
@@ -1319,10 +1339,8 @@ do_fdb_op:
 
        if (!do_notification)
                return;
-       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
-                                   mlxsw_sp_port->learning_sync,
-                                   adding, mac, vid,
-                                   mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev);
+       mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac,
+                                   vid, dev);
        return;
 
 just_remove:
@@ -1374,7 +1392,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
 
        mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
 
-       mutex_lock(&mlxsw_sp->fdb_lock);
+       rtnl_lock();
        do {
                mlxsw_reg_sfn_pack(sfn_pl);
                err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
@@ -1387,7 +1405,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
                        mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 
        } while (num_rec);
-       mutex_unlock(&mlxsw_sp->fdb_lock);
+       rtnl_unlock();
 
        kfree(sfn_pl);
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
@@ -1402,7 +1420,6 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
                dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n");
                return err;
        }
-       mutex_init(&mlxsw_sp->fdb_lock);
        INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work);
        mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL;
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
index a10c928bbd6b9bf5d192dfc6453395312062fe3f..00cfd95ca59d53fe040ef5b002e7d30604d35e96 100644 (file)
 
 #include "moxart_ether.h"
 
+static inline void moxart_desc_write(u32 data, u32 *desc)
+{
+       *desc = cpu_to_le32(data);
+}
+
+static inline u32 moxart_desc_read(u32 *desc)
+{
+       return le32_to_cpu(*desc);
+}
+
 static inline void moxart_emac_write(struct net_device *ndev,
                                     unsigned int reg, unsigned long value)
 {
@@ -112,7 +122,7 @@ static void moxart_mac_enable(struct net_device *ndev)
 static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       void __iomem *desc;
+       void *desc;
        int i;
 
        for (i = 0; i < TX_DESC_NUM; i++) {
@@ -121,7 +131,7 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 
                priv->tx_buf[i] = priv->tx_buf_base + priv->tx_buf_size * i;
        }
-       writel(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
+       moxart_desc_write(TX_DESC1_END, desc + TX_REG_OFFSET_DESC1);
 
        priv->tx_head = 0;
        priv->tx_tail = 0;
@@ -129,8 +139,8 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
        for (i = 0; i < RX_DESC_NUM; i++) {
                desc = priv->rx_desc_base + i * RX_REG_DESC_SIZE;
                memset(desc, 0, RX_REG_DESC_SIZE);
-               writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
-               writel(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
+               moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+               moxart_desc_write(RX_BUF_SIZE & RX_DESC1_BUF_SIZE_MASK,
                       desc + RX_REG_OFFSET_DESC1);
 
                priv->rx_buf[i] = priv->rx_buf_base + priv->rx_buf_size * i;
@@ -141,12 +151,12 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
                if (dma_mapping_error(&ndev->dev, priv->rx_mapping[i]))
                        netdev_err(ndev, "DMA mapping error\n");
 
-               writel(priv->rx_mapping[i],
+               moxart_desc_write(priv->rx_mapping[i],
                       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_PHYS);
-               writel(priv->rx_buf[i],
+               moxart_desc_write((uintptr_t)priv->rx_buf[i],
                       desc + RX_REG_OFFSET_DESC2 + RX_DESC2_ADDRESS_VIRT);
        }
-       writel(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
+       moxart_desc_write(RX_DESC1_END, desc + RX_REG_OFFSET_DESC1);
 
        priv->rx_head = 0;
 
@@ -201,14 +211,15 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
                                                      napi);
        struct net_device *ndev = priv->ndev;
        struct sk_buff *skb;
-       void __iomem *desc;
+       void *desc;
        unsigned int desc0, len;
        int rx_head = priv->rx_head;
        int rx = 0;
 
        while (rx < budget) {
                desc = priv->rx_desc_base + (RX_REG_DESC_SIZE * rx_head);
-               desc0 = readl(desc + RX_REG_OFFSET_DESC0);
+               desc0 = moxart_desc_read(desc + RX_REG_OFFSET_DESC0);
+               rmb(); /* ensure desc0 is up to date */
 
                if (desc0 & RX_DESC0_DMA_OWN)
                        break;
@@ -250,7 +261,8 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
                        priv->stats.multicast++;
 
 rx_next:
-               writel(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
+               wmb(); /* prevent setting ownership back too early */
+               moxart_desc_write(RX_DESC0_DMA_OWN, desc + RX_REG_OFFSET_DESC0);
 
                rx_head = RX_NEXT(rx_head);
                priv->rx_head = rx_head;
@@ -310,7 +322,7 @@ static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
 static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-       void __iomem *desc;
+       void *desc;
        unsigned int len;
        unsigned int tx_head = priv->tx_head;
        u32 txdes1;
@@ -319,11 +331,12 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
 
        spin_lock_irq(&priv->txlock);
-       if (readl(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
+       if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
                net_dbg_ratelimited("no TX space for packet\n");
                priv->stats.tx_dropped++;
                goto out_unlock;
        }
+       rmb(); /* ensure data is only read that had TX_DESC0_DMA_OWN cleared */
 
        len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len;
 
@@ -337,9 +350,9 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        priv->tx_len[tx_head] = len;
        priv->tx_skb[tx_head] = skb;
 
-       writel(priv->tx_mapping[tx_head],
+       moxart_desc_write(priv->tx_mapping[tx_head],
               desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_PHYS);
-       writel(skb->data,
+       moxart_desc_write((uintptr_t)skb->data,
               desc + TX_REG_OFFSET_DESC2 + TX_DESC2_ADDRESS_VIRT);
 
        if (skb->len < ETH_ZLEN) {
@@ -354,8 +367,9 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        txdes1 = TX_DESC1_LTS | TX_DESC1_FTS | (len & TX_DESC1_BUF_SIZE_MASK);
        if (tx_head == TX_DESC_NUM_MASK)
                txdes1 |= TX_DESC1_END;
-       writel(txdes1, desc + TX_REG_OFFSET_DESC1);
-       writel(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
+       moxart_desc_write(txdes1, desc + TX_REG_OFFSET_DESC1);
+       wmb(); /* flush descriptor before transferring ownership */
+       moxart_desc_write(TX_DESC0_DMA_OWN, desc + TX_REG_OFFSET_DESC0);
 
        /* start to send packet */
        writel(0xffffffff, priv->base + REG_TX_POLL_DEMAND);
index 2be9280d608ce9efe320ea7b20b01b1c350a023e..93a9563ac7c6730eec8240ac187a86b9831c9741 100644 (file)
@@ -300,7 +300,7 @@ struct moxart_mac_priv_t {
 
        dma_addr_t rx_base;
        dma_addr_t rx_mapping[RX_DESC_NUM];
-       void __iomem *rx_desc_base;
+       void *rx_desc_base;
        unsigned char *rx_buf_base;
        unsigned char *rx_buf[RX_DESC_NUM];
        unsigned int rx_head;
@@ -308,7 +308,7 @@ struct moxart_mac_priv_t {
 
        dma_addr_t tx_base;
        dma_addr_t tx_mapping[TX_DESC_NUM];
-       void __iomem *tx_desc_base;
+       void *tx_desc_base;
        unsigned char *tx_buf_base;
        unsigned char *tx_buf[RX_DESC_NUM];
        unsigned int tx_head;
index 50d5604833edc8e0ffde1f7f8974002852f8bb97..e0993eba5df3f8081bff6329a97b4753e876cbe3 100644 (file)
@@ -2223,8 +2223,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
        return IRQ_NONE;
 }
 
-#ifdef CONFIG_PCI_MSI
-
 static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
 {
        struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
@@ -2442,16 +2440,13 @@ static void vxge_rem_msix_isr(struct vxgedev *vdev)
        if (vdev->config.intr_type == MSI_X)
                pci_disable_msix(vdev->pdev);
 }
-#endif
 
 static void vxge_rem_isr(struct vxgedev *vdev)
 {
-#ifdef CONFIG_PCI_MSI
-       if (vdev->config.intr_type == MSI_X) {
+       if (IS_ENABLED(CONFIG_PCI_MSI) &&
+           vdev->config.intr_type == MSI_X) {
                vxge_rem_msix_isr(vdev);
-       } else
-#endif
-       if (vdev->config.intr_type == INTA) {
+       } else if (vdev->config.intr_type == INTA) {
                        synchronize_irq(vdev->pdev->irq);
                        free_irq(vdev->pdev->irq, vdev);
        }
@@ -2460,11 +2455,10 @@ static void vxge_rem_isr(struct vxgedev *vdev)
 static int vxge_add_isr(struct vxgedev *vdev)
 {
        int ret = 0;
-#ifdef CONFIG_PCI_MSI
        int vp_idx = 0, intr_idx = 0, intr_cnt = 0, msix_idx = 0, irq_req = 0;
        int pci_fun = PCI_FUNC(vdev->pdev->devfn);
 
-       if (vdev->config.intr_type == MSI_X)
+       if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X)
                ret = vxge_enable_msix(vdev);
 
        if (ret) {
@@ -2475,7 +2469,7 @@ static int vxge_add_isr(struct vxgedev *vdev)
                vdev->config.intr_type = INTA;
        }
 
-       if (vdev->config.intr_type == MSI_X) {
+       if (IS_ENABLED(CONFIG_PCI_MSI) && vdev->config.intr_type == MSI_X) {
                for (intr_idx = 0;
                     intr_idx < (vdev->no_of_vpath *
                        VXGE_HW_VPATH_MSIX_ACTIVE); intr_idx++) {
@@ -2576,9 +2570,8 @@ static int vxge_add_isr(struct vxgedev *vdev)
                vdev->vxge_entries[intr_cnt].in_use = 1;
                vdev->vxge_entries[intr_cnt].arg = &vdev->vpaths[0];
        }
-INTA_MODE:
-#endif
 
+INTA_MODE:
        if (vdev->config.intr_type == INTA) {
                snprintf(vdev->desc[0], VXGE_INTR_STRLEN,
                        "%s:vxge:INTA", vdev->ndev->name);
@@ -3889,12 +3882,12 @@ static void vxge_device_config_init(struct vxge_hw_device_config *device_config,
        if (max_mac_vpath > VXGE_MAX_MAC_ADDR_COUNT)
                max_mac_vpath = VXGE_MAX_MAC_ADDR_COUNT;
 
-#ifndef CONFIG_PCI_MSI
-       vxge_debug_init(VXGE_ERR,
-               "%s: This Kernel does not support "
-               "MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
-       *intr_type = INTA;
-#endif
+       if (!IS_ENABLED(CONFIG_PCI_MSI)) {
+               vxge_debug_init(VXGE_ERR,
+                       "%s: This Kernel does not support "
+                       "MSI-X. Defaulting to INTA", VXGE_DRIVER_NAME);
+               *intr_type = INTA;
+       }
 
        /* Configure whether MSI-X or IRQL. */
        switch (*intr_type) {
index a4ab71d43e4e9d521e2a58f75aca4a4dff3f9c30..166a7fc87e2f41966ab2b54a641ea1c7bfc46f1d 100644 (file)
@@ -3531,12 +3531,14 @@ static void rocker_port_fdb_learn_work(struct work_struct *work)
        info.addr = lw->addr;
        info.vid = lw->vid;
 
+       rtnl_lock();
        if (learned && removing)
                call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
                                         lw->rocker_port->dev, &info.info);
        else if (learned && !removing)
                call_switchdev_notifiers(SWITCHDEV_FDB_ADD,
                                         lw->rocker_port->dev, &info.info);
+       rtnl_unlock();
 
        rocker_port_kfree(lw->trans, work);
 }
index cc106d892e2975c924eafc8e850a4da8188ef227..23fa29877f5be79d39449284ecf8041d095d3712 100644 (file)
@@ -389,17 +389,27 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
        if (vio_version_after_eq(&port->vio, 1, 8)) {
                struct vio_net_dext *dext = vio_net_ext(desc);
 
+               skb_reset_network_header(skb);
+
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
                        if (skb->protocol == ETH_P_IP) {
-                               struct iphdr *iph = (struct iphdr *)skb->data;
+                               struct iphdr *iph = ip_hdr(skb);
 
                                iph->check = 0;
                                ip_send_check(iph);
                        }
                }
                if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
-                   skb->ip_summed == CHECKSUM_NONE)
-                       vnet_fullcsum(skb);
+                   skb->ip_summed == CHECKSUM_NONE) {
+                       if (skb->protocol == htons(ETH_P_IP)) {
+                               struct iphdr *iph = ip_hdr(skb);
+                               int ihl = iph->ihl * 4;
+
+                               skb_reset_transport_header(skb);
+                               skb_set_transport_header(skb, ihl);
+                               vnet_fullcsum(skb);
+                       }
+               }
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
                        skb->ip_summed = CHECKSUM_PARTIAL;
                        skb->csum_level = 0;
index 657b65bf5cac64254b4db7c845038d2e40549a8c..18bf3a8fdc505ad2ed8a0fa3b81dfeb7d833b94d 100644 (file)
@@ -82,7 +82,7 @@ struct cpdma_desc {
 
 struct cpdma_desc_pool {
        phys_addr_t             phys;
-       u32                     hw_addr;
+       dma_addr_t              hw_addr;
        void __iomem            *iomap;         /* ioremap map */
        void                    *cpumap;        /* dma_alloc map */
        int                     desc_size, mem_size;
@@ -152,7 +152,7 @@ struct cpdma_chan {
  * abstract out these details
  */
 static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
+cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
                                int size, int align)
 {
        int bitmap_size;
@@ -176,13 +176,13 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
 
        if (phys) {
                pool->phys  = phys;
-               pool->iomap = ioremap(phys, size);
+               pool->iomap = ioremap(phys, size); /* should be memremap? */
                pool->hw_addr = hw_addr;
        } else {
-               pool->cpumap = dma_alloc_coherent(dev, size, &pool->phys,
+               pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr,
                                                  GFP_KERNEL);
-               pool->iomap = pool->cpumap;
-               pool->hw_addr = pool->phys;
+               pool->iomap = (void __iomem __force *)pool->cpumap;
+               pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */
        }
 
        if (pool->iomap)
index 7f975a2c8990fcff4491b4171cae626490d45807..b0de8ecd7fe8ed8ab030e22da50aef82e283e4b2 100644 (file)
@@ -533,8 +533,8 @@ static int dfx_register(struct device *bdev)
        const char *print_name = dev_name(bdev);
        struct net_device *dev;
        DFX_board_t       *bp;                  /* board pointer */
-       resource_size_t bar_start[3];           /* pointers to ports */
-       resource_size_t bar_len[3];             /* resource length */
+       resource_size_t bar_start[3] = {0};     /* pointers to ports */
+       resource_size_t bar_len[3] = {0};       /* resource length */
        int alloc_size;                         /* total buffer size used */
        struct resource *region;
        int err = 0;
@@ -3697,8 +3697,8 @@ static void dfx_unregister(struct device *bdev)
        int dfx_bus_pci = dev_is_pci(bdev);
        int dfx_bus_tc = DFX_BUS_TC(bdev);
        int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
-       resource_size_t bar_start[3];           /* pointers to ports */
-       resource_size_t bar_len[3];             /* resource lengths */
+       resource_size_t bar_start[3] = {0};     /* pointers to ports */
+       resource_size_t bar_len[3] = {0};       /* resource lengths */
        int             alloc_size;             /* total buffer size used */
 
        unregister_netdev(dev);
index 7456569f53c1a312d5590dc88072505413126889..0b14ac3b8d1189081967aad8bcbbfce30016aeea 100644 (file)
@@ -980,9 +980,9 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
                        opts = ip_tunnel_info_opts(info);
 
                if (key->tun_flags & TUNNEL_CSUM)
-                       flags |= GENEVE_F_UDP_CSUM;
+                       flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX;
                else
-                       flags &= ~GENEVE_F_UDP_CSUM;
+                       flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
 
                err = geneve6_build_skb(dst, skb, key->tun_flags, vni,
                                        info->options_len, opts,
index f4130af09244fb1c6b24f230c7d79cbdaeb43a22..fcb92c0d0eb967e0deba99282152a25c804b73ea 100644 (file)
@@ -624,6 +624,7 @@ struct nvsp_message {
 #define RNDIS_PKT_ALIGN_DEFAULT 8
 
 struct multi_send_data {
+       struct sk_buff *skb; /* skb containing the pkt */
        struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
        u32 count; /* counter of batched packets */
 };
index 059fc523160107239d72080cadb1f2e7c0d246df..ec313fc08d82a3b6a3d8d79fd7a6527caa30733a 100644 (file)
@@ -841,6 +841,18 @@ static inline int netvsc_send_pkt(
        return ret;
 }
 
+/* Move packet out of multi send data (msd), and clear msd */
+static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
+                               struct sk_buff **msd_skb,
+                               struct multi_send_data *msdp)
+{
+       *msd_skb = msdp->skb;
+       *msd_send = msdp->pkt;
+       msdp->skb = NULL;
+       msdp->pkt = NULL;
+       msdp->count = 0;
+}
+
 int netvsc_send(struct hv_device *device,
                struct hv_netvsc_packet *packet,
                struct rndis_message *rndis_msg,
@@ -855,6 +867,7 @@ int netvsc_send(struct hv_device *device,
        unsigned int section_index = NETVSC_INVALID_INDEX;
        struct multi_send_data *msdp;
        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+       struct sk_buff *msd_skb = NULL;
        bool try_batch;
        bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
@@ -897,10 +910,8 @@ int netvsc_send(struct hv_device *device,
                   net_device->send_section_size) {
                section_index = netvsc_get_next_send_section(net_device);
                if (section_index != NETVSC_INVALID_INDEX) {
-                               msd_send = msdp->pkt;
-                               msdp->pkt = NULL;
-                               msdp->count = 0;
-                               msd_len = 0;
+                       move_pkt_msd(&msd_send, &msd_skb, msdp);
+                       msd_len = 0;
                }
        }
 
@@ -919,31 +930,31 @@ int netvsc_send(struct hv_device *device,
                        packet->total_data_buflen += msd_len;
                }
 
-               if (msdp->pkt)
-                       dev_kfree_skb_any(skb);
+               if (msdp->skb)
+                       dev_kfree_skb_any(msdp->skb);
 
                if (xmit_more && !packet->cp_partial) {
+                       msdp->skb = skb;
                        msdp->pkt = packet;
                        msdp->count++;
                } else {
                        cur_send = packet;
+                       msdp->skb = NULL;
                        msdp->pkt = NULL;
                        msdp->count = 0;
                }
        } else {
-               msd_send = msdp->pkt;
-               msdp->pkt = NULL;
-               msdp->count = 0;
+               move_pkt_msd(&msd_send, &msd_skb, msdp);
                cur_send = packet;
        }
 
        if (msd_send) {
-               m_ret = netvsc_send_pkt(msd_send, net_device, pb, skb);
+               m_ret = netvsc_send_pkt(msd_send, net_device, NULL, msd_skb);
 
                if (m_ret != 0) {
                        netvsc_free_send_slot(net_device,
                                              msd_send->send_buf_index);
-                       dev_kfree_skb_any(skb);
+                       dev_kfree_skb_any(msd_skb);
                }
        }
 
index 1c8db9afdcda8c0356db46db9fcfa4d971050b7b..1d3a66563bacbb0628e90693f8f3409c62018d68 100644 (file)
@@ -196,65 +196,6 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
        return ppi;
 }
 
-union sub_key {
-       u64 k;
-       struct {
-               u8 pad[3];
-               u8 kb;
-               u32 ka;
-       };
-};
-
-/* Toeplitz hash function
- * data: network byte order
- * return: host byte order
- */
-static u32 comp_hash(u8 *key, int klen, void *data, int dlen)
-{
-       union sub_key subk;
-       int k_next = 4;
-       u8 dt;
-       int i, j;
-       u32 ret = 0;
-
-       subk.k = 0;
-       subk.ka = ntohl(*(u32 *)key);
-
-       for (i = 0; i < dlen; i++) {
-               subk.kb = key[k_next];
-               k_next = (k_next + 1) % klen;
-               dt = ((u8 *)data)[i];
-               for (j = 0; j < 8; j++) {
-                       if (dt & 0x80)
-                               ret ^= subk.ka;
-                       dt <<= 1;
-                       subk.k <<= 1;
-               }
-       }
-
-       return ret;
-}
-
-static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
-{
-       struct flow_keys flow;
-       int data_len;
-
-       if (!skb_flow_dissect_flow_keys(skb, &flow, 0) ||
-           !(flow.basic.n_proto == htons(ETH_P_IP) ||
-             flow.basic.n_proto == htons(ETH_P_IPV6)))
-               return false;
-
-       if (flow.basic.ip_proto == IPPROTO_TCP)
-               data_len = 12;
-       else
-               data_len = 8;
-
-       *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len);
-
-       return true;
-}
-
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                        void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -267,11 +208,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
        if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
                return 0;
 
-       if (netvsc_set_hash(&hash, skb)) {
-               q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
-                       ndev->real_num_tx_queues;
-               skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
-       }
+       hash = skb_get_hash(skb);
+       q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+               ndev->real_num_tx_queues;
 
        if (!nvsc_dev->chn_table[q_idx])
                q_idx = 0;
index 29cbde8501ed8e09750f771204dfe9fc38503d34..d47cf14bb4a5b43bf5c94e0bdc533e1033f79b2a 100644 (file)
@@ -82,9 +82,6 @@ struct bfin_sir_self {
 
 #define DRIVER_NAME "bfin_sir"
 
-#define port_membase(port)     (((struct bfin_sir_port *)(port))->membase)
-#define get_lsr_cache(port)    (((struct bfin_sir_port *)(port))->lsr)
-#define put_lsr_cache(port, v) (((struct bfin_sir_port *)(port))->lsr = (v))
 #include <asm/bfin_serial.h>
 
 static const unsigned short per[][4] = {
index 6a57a005e0ca8162d2a0b9334440d1b9cb7d75db..94e688805dd262317327a446d8e2e460a668d75a 100644 (file)
@@ -1323,6 +1323,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 
        list_add_tail_rcu(&vlan->list, &port->vlans);
        netif_stacked_transfer_operstate(lowerdev, dev);
+       linkwatch_fire_event(dev);
 
        return 0;
 
@@ -1522,6 +1523,7 @@ static int macvlan_device_event(struct notifier_block *unused,
        port = macvlan_port_get_rtnl(dev);
 
        switch (event) {
+       case NETDEV_UP:
        case NETDEV_CHANGE:
                list_for_each_entry(vlan, &port->vlans, list)
                        netif_stacked_transfer_operstate(vlan->lowerdev,
index 60994a83a0d68ca2e4c229fe16140b6017a9dd55..f0a77020037af0790f37b4098e4f6937c6a1ebee 100644 (file)
@@ -186,6 +186,7 @@ config MDIO_GPIO
 config MDIO_OCTEON
        tristate "Support for MDIO buses on Octeon and ThunderX SOCs"
        depends on 64BIT
+       depends on HAS_IOMEM
        help
 
          This module provides a driver for the Octeon and ThunderX MDIO
index 180f6995277944923b2aa57bacdf0bc17b9637ad..7a240fce3a7ea09edc9f10c6b93fadd94b2de3f5 100644 (file)
@@ -846,6 +846,11 @@ static void decode_rxts(struct dp83640_private *dp83640,
        struct skb_shared_hwtstamps *shhwtstamps = NULL;
        struct sk_buff *skb;
        unsigned long flags;
+       u8 overflow;
+
+       overflow = (phy_rxts->ns_hi >> 14) & 0x3;
+       if (overflow)
+               pr_debug("rx timestamp queue overflow, count %d\n", overflow);
 
        spin_lock_irqsave(&dp83640->rx_lock, flags);
 
@@ -888,6 +893,7 @@ static void decode_txts(struct dp83640_private *dp83640,
        struct skb_shared_hwtstamps shhwtstamps;
        struct sk_buff *skb;
        u64 ns;
+       u8 overflow;
 
        /* We must already have the skb that triggered this. */
 
@@ -897,6 +903,17 @@ static void decode_txts(struct dp83640_private *dp83640,
                pr_debug("have timestamp but tx_queue empty\n");
                return;
        }
+
+       overflow = (phy_txts->ns_hi >> 14) & 0x3;
+       if (overflow) {
+               pr_debug("tx timestamp queue overflow, count %d\n", overflow);
+               while (skb) {
+                       skb_complete_tx_timestamp(skb, NULL);
+                       skb = skb_dequeue(&dp83640->tx_queue);
+               }
+               return;
+       }
+
        ns = phy2txts(phy_txts);
        memset(&shhwtstamps, 0, sizeof(shhwtstamps));
        shhwtstamps.hwtstamp = ns_to_ktime(ns);
index 8763bb20988a0edf80091347987fb38086e85496..5590b9c182c967d80a186256162f30690c707506 100644 (file)
@@ -692,25 +692,29 @@ void phy_change(struct work_struct *work)
        struct phy_device *phydev =
                container_of(work, struct phy_device, phy_queue);
 
-       if (phydev->drv->did_interrupt &&
-           !phydev->drv->did_interrupt(phydev))
-               goto ignore;
+       if (phy_interrupt_is_valid(phydev)) {
+               if (phydev->drv->did_interrupt &&
+                   !phydev->drv->did_interrupt(phydev))
+                       goto ignore;
 
-       if (phy_disable_interrupts(phydev))
-               goto phy_err;
+               if (phy_disable_interrupts(phydev))
+                       goto phy_err;
+       }
 
        mutex_lock(&phydev->lock);
        if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
                phydev->state = PHY_CHANGELINK;
        mutex_unlock(&phydev->lock);
 
-       atomic_dec(&phydev->irq_disable);
-       enable_irq(phydev->irq);
+       if (phy_interrupt_is_valid(phydev)) {
+               atomic_dec(&phydev->irq_disable);
+               enable_irq(phydev->irq);
 
-       /* Reenable interrupts */
-       if (PHY_HALTED != phydev->state &&
-           phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
-               goto irq_enable_err;
+               /* Reenable interrupts */
+               if (PHY_HALTED != phydev->state &&
+                   phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
+                       goto irq_enable_err;
+       }
 
        /* reschedule state queue work to run as soon as possible */
        cancel_delayed_work_sync(&phydev->state_queue);
@@ -905,10 +909,10 @@ void phy_state_machine(struct work_struct *work)
                phydev->adjust_link(phydev->attached_dev);
                break;
        case PHY_RUNNING:
-               /* Only register a CHANGE if we are polling or ignoring
-                * interrupts and link changed since latest checking.
+               /* Only register a CHANGE if we are polling and link changed
+                * since latest checking.
                 */
-               if (!phy_interrupt_is_valid(phydev)) {
+               if (phydev->irq == PHY_POLL) {
                        old_link = phydev->link;
                        err = phy_read_status(phydev);
                        if (err)
@@ -1000,15 +1004,21 @@ void phy_state_machine(struct work_struct *work)
                   phy_state_to_str(old_state),
                   phy_state_to_str(phydev->state));
 
-       queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
-                          PHY_STATE_TIME * HZ);
+       /* Only re-schedule a PHY state machine change if we are polling the
+        * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
+        * between states from phy_mac_interrupt()
+        */
+       if (phydev->irq == PHY_POLL)
+               queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+                                  PHY_STATE_TIME * HZ);
 }
 
 void phy_mac_interrupt(struct phy_device *phydev, int new_link)
 {
-       cancel_work_sync(&phydev->phy_queue);
        phydev->link = new_link;
-       schedule_work(&phydev->phy_queue);
+
+       /* Trigger a state machine change */
+       queue_work(system_power_efficient_wq, &phydev->phy_queue);
 }
 EXPORT_SYMBOL(phy_mac_interrupt);
 
index e485f2653c8208578d7994284dd12faabcc6c096..2e21e9366f76a184aa2c8d770557e6d9ff0db235 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/smscphy.h>
 
+struct smsc_phy_priv {
+       bool energy_enable;
+};
+
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
        int rc = phy_write (phydev, MII_LAN83C185_IM,
@@ -43,19 +47,14 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
 
 static int smsc_phy_config_init(struct phy_device *phydev)
 {
-       int __maybe_unused len;
-       struct device *dev __maybe_unused = &phydev->mdio.dev;
-       struct device_node *of_node __maybe_unused = dev->of_node;
+       struct smsc_phy_priv *priv = phydev->priv;
+
        int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
-       int enable_energy = 1;
 
        if (rc < 0)
                return rc;
 
-       if (of_find_property(of_node, "smsc,disable-energy-detect", &len))
-               enable_energy = 0;
-
-       if (enable_energy) {
+       if (priv->energy_enable) {
                /* Enable energy detect mode for this SMSC Transceivers */
                rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
                               rc | MII_LAN83C185_EDPWRDOWN);
@@ -110,10 +109,13 @@ static int lan911x_config_init(struct phy_device *phydev)
  */
 static int lan87xx_read_status(struct phy_device *phydev)
 {
+       struct smsc_phy_priv *priv = phydev->priv;
+
        int err = genphy_read_status(phydev);
-       int i;
 
-       if (!phydev->link) {
+       if (!phydev->link && priv->energy_enable) {
+               int i;
+
                /* Disable EDPD to wake up PHY */
                int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
                if (rc < 0)
@@ -149,6 +151,26 @@ static int lan87xx_read_status(struct phy_device *phydev)
        return err;
 }
 
+static int smsc_phy_probe(struct phy_device *phydev)
+{
+       struct device *dev = &phydev->mdio.dev;
+       struct device_node *of_node = dev->of_node;
+       struct smsc_phy_priv *priv;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->energy_enable = true;
+
+       if (of_property_read_bool(of_node, "smsc,disable-energy-detect"))
+               priv->energy_enable = false;
+
+       phydev->priv = priv;
+
+       return 0;
+}
+
 static struct phy_driver smsc_phy_driver[] = {
 {
        .phy_id         = 0x0007c0a0, /* OUI=0x00800f, Model#=0x0a */
@@ -159,6 +181,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -180,6 +204,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -201,6 +227,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
@@ -222,6 +250,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
@@ -242,6 +272,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
@@ -263,6 +295,8 @@ static struct phy_driver smsc_phy_driver[] = {
                                | SUPPORTED_Asym_Pause),
        .flags          = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
 
+       .probe          = smsc_phy_probe,
+
        /* basic functions */
        .config_aneg    = genphy_config_aneg,
        .read_status    = lan87xx_read_status,
index 90868ca5e3412ffebd28630f571bdddfe389cc35..ae0905ed4a32b55183568e95512c6727458a9f96 100644 (file)
@@ -129,24 +129,27 @@ static int lookup_chan_dst(u16 call_id, __be32 d_addr)
        return i < MAX_CALLID;
 }
 
-static int add_chan(struct pppox_sock *sock)
+static int add_chan(struct pppox_sock *sock,
+                   struct pptp_addr *sa)
 {
        static int call_id;
 
        spin_lock(&chan_lock);
-       if (!sock->proto.pptp.src_addr.call_id) {
+       if (!sa->call_id)       {
                call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1);
                if (call_id == MAX_CALLID) {
                        call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1);
                        if (call_id == MAX_CALLID)
                                goto out_err;
                }
-               sock->proto.pptp.src_addr.call_id = call_id;
-       } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap))
+               sa->call_id = call_id;
+       } else if (test_bit(sa->call_id, callid_bitmap)) {
                goto out_err;
+       }
 
-       set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
-       rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock);
+       sock->proto.pptp.src_addr = *sa;
+       set_bit(sa->call_id, callid_bitmap);
+       rcu_assign_pointer(callid_sock[sa->call_id], sock);
        spin_unlock(&chan_lock);
 
        return 0;
@@ -416,7 +419,6 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
        struct sock *sk = sock->sk;
        struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
        struct pppox_sock *po = pppox_sk(sk);
-       struct pptp_opt *opt = &po->proto.pptp;
        int error = 0;
 
        if (sockaddr_len < sizeof(struct sockaddr_pppox))
@@ -424,10 +426,22 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
 
        lock_sock(sk);
 
-       opt->src_addr = sp->sa_addr.pptp;
-       if (add_chan(po))
+       if (sk->sk_state & PPPOX_DEAD) {
+               error = -EALREADY;
+               goto out;
+       }
+
+       if (sk->sk_state & PPPOX_BOUND) {
                error = -EBUSY;
+               goto out;
+       }
+
+       if (add_chan(po, &sp->sa_addr.pptp))
+               error = -EBUSY;
+       else
+               sk->sk_state |= PPPOX_BOUND;
 
+out:
        release_sock(sk);
        return error;
 }
@@ -498,7 +512,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
        }
 
        opt->dst_addr = sp->sa_addr.pptp;
-       sk->sk_state = PPPOX_CONNECTED;
+       sk->sk_state |= PPPOX_CONNECTED;
 
  end:
        release_sock(sk);
index 2ed53331bfb28de22019754cb46a8a27c35c43ad..1c299b8a162d7243c5ae94b96618c3763083e91e 100644 (file)
@@ -36,7 +36,7 @@
 #define DRIVER_AUTHOR  "WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC    "LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME    "lan78xx"
-#define DRIVER_VERSION "1.0.1"
+#define DRIVER_VERSION "1.0.2"
 
 #define TX_TIMEOUT_JIFFIES             (5 * HZ)
 #define THROTTLE_JIFFIES               (HZ / 8)
@@ -462,32 +462,53 @@ static int lan78xx_read_raw_eeprom(struct lan78xx_net *dev, u32 offset,
                                   u32 length, u8 *data)
 {
        u32 val;
+       u32 saved;
        int i, ret;
+       int retval;
 
-       ret = lan78xx_eeprom_confirm_not_busy(dev);
-       if (ret)
-               return ret;
+       /* depends on chip, some EEPROM pins are muxed with LED function.
+        * disable & restore LED function to access EEPROM.
+        */
+       ret = lan78xx_read_reg(dev, HW_CFG, &val);
+       saved = val;
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+               val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+               ret = lan78xx_write_reg(dev, HW_CFG, val);
+       }
+
+       retval = lan78xx_eeprom_confirm_not_busy(dev);
+       if (retval)
+               return retval;
 
        for (i = 0; i < length; i++) {
                val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_;
                val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
                ret = lan78xx_write_reg(dev, E2P_CMD, val);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               if (unlikely(ret < 0)) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
-               ret = lan78xx_wait_eeprom(dev);
-               if (ret < 0)
-                       return ret;
+               retval = lan78xx_wait_eeprom(dev);
+               if (retval < 0)
+                       goto exit;
 
                ret = lan78xx_read_reg(dev, E2P_DATA, &val);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               if (unlikely(ret < 0)) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
                data[i] = val & 0xFF;
                offset++;
        }
 
-       return 0;
+       retval = 0;
+exit:
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+               ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+       return retval;
 }
 
 static int lan78xx_read_eeprom(struct lan78xx_net *dev, u32 offset,
@@ -509,44 +530,67 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset,
                                    u32 length, u8 *data)
 {
        u32 val;
+       u32 saved;
        int i, ret;
+       int retval;
 
-       ret = lan78xx_eeprom_confirm_not_busy(dev);
-       if (ret)
-               return ret;
+       /* depends on chip, some EEPROM pins are muxed with LED function.
+        * disable & restore LED function to access EEPROM.
+        */
+       ret = lan78xx_read_reg(dev, HW_CFG, &val);
+       saved = val;
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000) {
+               val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
+               ret = lan78xx_write_reg(dev, HW_CFG, val);
+       }
+
+       retval = lan78xx_eeprom_confirm_not_busy(dev);
+       if (retval)
+               goto exit;
 
        /* Issue write/erase enable command */
        val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_;
        ret = lan78xx_write_reg(dev, E2P_CMD, val);
-       if (unlikely(ret < 0))
-               return -EIO;
+       if (unlikely(ret < 0)) {
+               retval = -EIO;
+               goto exit;
+       }
 
-       ret = lan78xx_wait_eeprom(dev);
-       if (ret < 0)
-               return ret;
+       retval = lan78xx_wait_eeprom(dev);
+       if (retval < 0)
+               goto exit;
 
        for (i = 0; i < length; i++) {
                /* Fill data register */
                val = data[i];
                ret = lan78xx_write_reg(dev, E2P_DATA, val);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
                /* Send "write" command */
                val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_;
                val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
                ret = lan78xx_write_reg(dev, E2P_CMD, val);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       retval = -EIO;
+                       goto exit;
+               }
 
-               ret = lan78xx_wait_eeprom(dev);
-               if (ret < 0)
-                       return ret;
+               retval = lan78xx_wait_eeprom(dev);
+               if (retval < 0)
+                       goto exit;
 
                offset++;
        }
 
-       return 0;
+       retval = 0;
+exit:
+       if ((dev->devid & ID_REV_CHIP_ID_MASK_) == 0x78000000)
+               ret = lan78xx_write_reg(dev, HW_CFG, saved);
+
+       return retval;
 }
 
 static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
@@ -904,7 +948,6 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 
        if (!phydev->link && dev->link_on) {
                dev->link_on = false;
-               netif_carrier_off(dev->net);
 
                /* reset MAC */
                ret = lan78xx_read_reg(dev, MAC_CR, &buf);
@@ -914,6 +957,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                ret = lan78xx_write_reg(dev, MAC_CR, buf);
                if (unlikely(ret < 0))
                        return -EIO;
+
+               phy_mac_interrupt(phydev, 0);
        } else if (phydev->link && !dev->link_on) {
                dev->link_on = true;
 
@@ -953,7 +998,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                          ethtool_cmd_speed(&ecmd), ecmd.duplex, ladv, radv);
 
                ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv);
-               netif_carrier_on(dev->net);
+               phy_mac_interrupt(phydev, 1);
        }
 
        return ret;
@@ -1495,7 +1540,6 @@ done:
 static int lan78xx_mdio_init(struct lan78xx_net *dev)
 {
        int ret;
-       int i;
 
        dev->mdiobus = mdiobus_alloc();
        if (!dev->mdiobus) {
@@ -1511,10 +1555,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
        snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d",
                 dev->udev->bus->busnum, dev->udev->devnum);
 
-       /* handle our own interrupt */
-       for (i = 0; i < PHY_MAX_ADDR; i++)
-               dev->mdiobus->irq[i] = PHY_IGNORE_INTERRUPT;
-
        switch (dev->devid & ID_REV_CHIP_ID_MASK_) {
        case 0x78000000:
        case 0x78500000:
@@ -1558,6 +1598,16 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                return -EIO;
        }
 
+       /* Enable PHY interrupts.
+        * We handle our own interrupt
+        */
+       ret = phy_read(phydev, LAN88XX_INT_STS);
+       ret = phy_write(phydev, LAN88XX_INT_MASK,
+                       LAN88XX_INT_MASK_MDINTPIN_EN_ |
+                       LAN88XX_INT_MASK_LINK_CHANGE_);
+
+       phydev->irq = PHY_IGNORE_INTERRUPT;
+
        ret = phy_connect_direct(dev->net, phydev,
                                 lan78xx_link_status_change,
                                 PHY_INTERFACE_MODE_GMII);
@@ -1580,14 +1630,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                              SUPPORTED_Pause | SUPPORTED_Asym_Pause);
        genphy_config_aneg(phydev);
 
-       /* Workaround to enable PHY interrupt.
-        * phy_start_interrupts() is API for requesting and enabling
-        * PHY interrupt. However, USB-to-Ethernet device can't use
-        * request_irq() called in phy_start_interrupts().
-        * Set PHY to PHY_HALTED and call phy_start()
-        * to make a call to phy_enable_interrupts()
-        */
-       phy_stop(phydev);
        phy_start(phydev);
 
        netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
@@ -2221,7 +2263,9 @@ netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
        if (skb2) {
                skb_queue_tail(&dev->txq_pend, skb2);
 
-               if (skb_queue_len(&dev->txq_pend) > 10)
+               /* throttle TX patch at slower than SUPER SPEED USB */
+               if ((dev->udev->speed < USB_SPEED_SUPER) &&
+                   (skb_queue_len(&dev->txq_pend) > 10))
                        netif_stop_queue(net);
        } else {
                netif_dbg(dev, tx_err, dev->net,
index 2d88c799d2ac32a44c2bc304e19280f2e471ad7b..65439188c5829e1cce0464e6c34be1c8bb5c18c3 100644 (file)
@@ -73,7 +73,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 static int vxlan_net_id;
 static struct rtnl_link_ops vxlan_link_ops;
 
-static const u8 all_zeros_mac[ETH_ALEN];
+static const u8 all_zeros_mac[ETH_ALEN + 2];
 
 static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
@@ -1985,11 +1985,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                     vxlan->cfg.port_max, true);
 
        if (info) {
-               if (info->key.tun_flags & TUNNEL_CSUM)
-                       flags |= VXLAN_F_UDP_CSUM;
-               else
-                       flags &= ~VXLAN_F_UDP_CSUM;
-
                ttl = info->key.ttl;
                tos = info->key.tos;
 
@@ -2004,8 +1999,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        goto drop;
                sk = vxlan->vn4_sock->sock->sk;
 
-               if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
-                       df = htons(IP_DF);
+               if (info) {
+                       if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+                               df = htons(IP_DF);
+
+                       if (info->key.tun_flags & TUNNEL_CSUM)
+                               flags |= VXLAN_F_UDP_CSUM;
+                       else
+                               flags &= ~VXLAN_F_UDP_CSUM;
+               }
 
                memset(&fl4, 0, sizeof(fl4));
                fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
@@ -2101,6 +2103,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        return;
                }
 
+               if (info) {
+                       if (info->key.tun_flags & TUNNEL_CSUM)
+                               flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
+                       else
+                               flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+               }
+
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
                                      0, ttl, src_port, dst_port, htonl(vni << 8), md,
index a7afdeee698c690b9decf1e5da83c884071d9690..73fb4232f9f28c09e6a3f44090dc401684be8747 100644 (file)
@@ -150,18 +150,18 @@ int ath9k_hw_nvram_swap_data(struct ath_hw *ah, bool *swap_needed, int size)
                return -EIO;
        }
 
-       if (magic == AR5416_EEPROM_MAGIC) {
-               *swap_needed = false;
-       } else if (swab16(magic) == AR5416_EEPROM_MAGIC) {
+       *swap_needed = false;
+       if (swab16(magic) == AR5416_EEPROM_MAGIC) {
                if (ah->ah_flags & AH_NO_EEP_SWAP) {
                        ath_info(common,
                                 "Ignoring endianness difference in EEPROM magic bytes.\n");
-
-                       *swap_needed = false;
                } else {
                        *swap_needed = true;
                }
-       } else {
+       } else if (magic != AR5416_EEPROM_MAGIC) {
+               if (ath9k_hw_use_flash(ah))
+                       return 0;
+
                ath_err(common,
                        "Invalid EEPROM Magic (0x%04x).\n", magic);
                return -EINVAL;
index 53637399bb99d0743663c50cd674e2a1711cfc3b..b98db8a0a069bdf71e3c09dcf873aa6905700823 100644 (file)
@@ -879,11 +879,24 @@ int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn)
        return 0;
 }
 
-static void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev)
 {
+       struct sdio_func *func;
+       struct mmc_host *host;
+       uint max_blocks;
        uint nents;
        int err;
 
+       func = sdiodev->func[2];
+       host = func->card->host;
+       sdiodev->sg_support = host->max_segs > 1;
+       max_blocks = min_t(uint, host->max_blk_count, 511u);
+       sdiodev->max_request_size = min_t(uint, host->max_req_size,
+                                         max_blocks * func->cur_blksize);
+       sdiodev->max_segment_count = min_t(uint, host->max_segs,
+                                          SG_MAX_SINGLE_ALLOC);
+       sdiodev->max_segment_size = host->max_seg_size;
+
        if (!sdiodev->sg_support)
                return;
 
@@ -1021,9 +1034,6 @@ static void brcmf_sdiod_host_fixup(struct mmc_host *host)
 
 static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 {
-       struct sdio_func *func;
-       struct mmc_host *host;
-       uint max_blocks;
        int ret = 0;
 
        sdiodev->num_funcs = 2;
@@ -1054,26 +1064,6 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
                goto out;
        }
 
-       /*
-        * determine host related variables after brcmf_sdiod_probe()
-        * as func->cur_blksize is properly set and F2 init has been
-        * completed successfully.
-        */
-       func = sdiodev->func[2];
-       host = func->card->host;
-       sdiodev->sg_support = host->max_segs > 1;
-       max_blocks = min_t(uint, host->max_blk_count, 511u);
-       sdiodev->max_request_size = min_t(uint, host->max_req_size,
-                                         max_blocks * func->cur_blksize);
-       sdiodev->max_segment_count = min_t(uint, host->max_segs,
-                                          SG_MAX_SINGLE_ALLOC);
-       sdiodev->max_segment_size = host->max_seg_size;
-
-       /* allocate scatter-gather table. sg support
-        * will be disabled upon allocation failure.
-        */
-       brcmf_sdiod_sgtable_alloc(sdiodev);
-
        ret = brcmf_sdiod_freezer_attach(sdiodev);
        if (ret)
                goto out;
@@ -1084,7 +1074,7 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
                ret = -ENODEV;
                goto out;
        }
-       brcmf_sdiod_host_fixup(host);
+       brcmf_sdiod_host_fixup(sdiodev->func[2]->card->host);
 out:
        if (ret)
                brcmf_sdiod_remove(sdiodev);
index 4265b50faa98126400434f1c28995ded21e9e7f5..cfee477a6eb1f4b3f4ce5af01d3d19d37d768fb6 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/netdevice.h>
+#include <linux/module.h>
 #include <brcmu_wifi.h>
 #include <brcmu_utils.h>
 #include "core.h"
index dd6614332836a86443e505d9ee6394ec26ccfb6b..a14d9d9da094224b0c8c6d326547a5001c29d925 100644 (file)
@@ -4114,6 +4114,11 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
                goto fail;
        }
 
+       /* allocate scatter-gather table. sg support
+        * will be disabled upon allocation failure.
+        */
+       brcmf_sdiod_sgtable_alloc(bus->sdiodev);
+
        /* Query the F2 block size, set roundup accordingly */
        bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
        bus->roundup = min(max_roundup, bus->blocksize);
index 5ec7a6d876723029f3a98a1047e0da9bcbf53d53..23f223150cef2cf4ec768b196d906603b92f36b2 100644 (file)
@@ -342,6 +342,7 @@ int brcmf_sdiod_ramrw(struct brcmf_sdio_dev *sdiodev, bool write, u32 address,
 
 /* Issue an abort to the specified function */
 int brcmf_sdiod_abort(struct brcmf_sdio_dev *sdiodev, uint fn);
+void brcmf_sdiod_sgtable_alloc(struct brcmf_sdio_dev *sdiodev);
 void brcmf_sdiod_change_state(struct brcmf_sdio_dev *sdiodev,
                              enum brcmf_sdiod_state state);
 #ifdef CONFIG_PM_SLEEP
index e60cf141ed796f0bbcaf1c8b68cf7a08676ce86e..fa41a5e1c8902002d3bf8a1d3991a416d6600499 100644 (file)
 #define IWL7260_UCODE_API_MAX  17
 #define IWL7265_UCODE_API_MAX  17
 #define IWL7265D_UCODE_API_MAX 20
+#define IWL3168_UCODE_API_MAX  20
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK   13
 #define IWL7265_UCODE_API_OK   13
 #define IWL7265D_UCODE_API_OK  13
+#define IWL3168_UCODE_API_OK   20
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN  13
 #define IWL7265_UCODE_API_MIN  13
 #define IWL7265D_UCODE_API_MIN 13
+#define IWL3168_UCODE_API_MIN  20
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION            0x0a1d
@@ -92,6 +95,8 @@
 #define IWL3160_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL3165_NVM_VERSION            0x709
 #define IWL3165_TX_POWER_VERSION       0xffff /* meaningless */
+#define IWL3168_NVM_VERSION            0xd01
+#define IWL3168_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL7265_NVM_VERSION            0x0a1d
 #define IWL7265_TX_POWER_VERSION       0xffff /* meaningless */
 #define IWL7265D_NVM_VERSION           0x0c11
 #define IWL3160_FW_PRE "iwlwifi-3160-"
 #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode"
 
+#define IWL3168_FW_PRE "iwlwifi-3168-"
+#define IWL3168_MODULE_FIRMWARE(api) IWL3168_FW_PRE __stringify(api) ".ucode"
+
 #define IWL7265_FW_PRE "iwlwifi-7265-"
 #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode"
 
@@ -180,6 +188,12 @@ static const struct iwl_ht_params iwl7000_ht_params = {
        .ucode_api_ok = IWL7265_UCODE_API_OK,                   \
        .ucode_api_min = IWL7265_UCODE_API_MIN
 
+#define IWL_DEVICE_3008                                                \
+       IWL_DEVICE_7000_COMMON,                                 \
+       .ucode_api_max = IWL3168_UCODE_API_MAX,                 \
+       .ucode_api_ok = IWL3168_UCODE_API_OK,                   \
+       .ucode_api_min = IWL3168_UCODE_API_MIN
+
 #define IWL_DEVICE_7005D                                       \
        IWL_DEVICE_7000_COMMON,                                 \
        .ucode_api_max = IWL7265D_UCODE_API_MAX,                \
@@ -299,11 +313,11 @@ const struct iwl_cfg iwl3165_2ac_cfg = {
 
 const struct iwl_cfg iwl3168_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 3168",
-       .fw_name_pre = IWL7265D_FW_PRE,
-       IWL_DEVICE_7000,
+       .fw_name_pre = IWL3168_FW_PRE,
+       IWL_DEVICE_3008,
        .ht_params = &iwl7000_ht_params,
-       .nvm_ver = IWL3165_NVM_VERSION,
-       .nvm_calib_ver = IWL3165_TX_POWER_VERSION,
+       .nvm_ver = IWL3168_NVM_VERSION,
+       .nvm_calib_ver = IWL3168_TX_POWER_VERSION,
        .pwr_tx_backoffs = iwl7265_pwr_tx_backoffs,
        .dccm_len = IWL7265_DCCM_LEN,
 };
@@ -376,5 +390,6 @@ const struct iwl_cfg iwl7265d_n_cfg = {
 
 MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
+MODULE_FIRMWARE(IWL3168_MODULE_FIRMWARE(IWL3168_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7265_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7265D_UCODE_API_OK));
index 0036d18334af4ab893b0b45214e972a3836d7885..ba3f0bbddde8874db03a74cd9a0ca01df862bc38 100644 (file)
@@ -510,6 +510,9 @@ struct iwl_mvm_tx_resp {
  * @scd_ssn: the index of the last contiguously sent packet
  * @txed: number of Txed frames in this batch
  * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *     not a copy from the LQ command. Thus, if not the first rate was used
+ *     for Tx-ing then this value will be set to 0 by FW.
  */
 struct iwl_mvm_ba_notif {
        __le32 sta_addr_lo32;
@@ -524,7 +527,8 @@ struct iwl_mvm_ba_notif {
        __le16 scd_ssn;
        u8 txed;
        u8 txed_2_done;
-       __le16 reserved1;
+       u8 reduced_txp;
+       u8 reserved1;
 } __packed;
 
 /*
index 7bb6fd0e4391a92f0200aad6e62362fc20c48802..94caa88df4422764573dbd6099af5315c962bdc5 100644 (file)
@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -724,14 +725,28 @@ static int _rs_collect_tx_data(struct iwl_mvm *mvm,
        return 0;
 }
 
-static int rs_collect_tx_data(struct iwl_mvm *mvm,
-                             struct iwl_lq_sta *lq_sta,
-                             struct iwl_scale_tbl_info *tbl,
-                             int scale_index, int attempts, int successes,
-                             u8 reduced_txp)
+static int rs_collect_tpc_data(struct iwl_mvm *mvm,
+                              struct iwl_lq_sta *lq_sta,
+                              struct iwl_scale_tbl_info *tbl,
+                              int scale_index, int attempts, int successes,
+                              u8 reduced_txp)
+{
+       struct iwl_rate_scale_data *window = NULL;
+
+       if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
+               return -EINVAL;
+
+       window = &tbl->tpc_win[reduced_txp];
+       return  _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
+                                   window);
+}
+
+static int rs_collect_tlc_data(struct iwl_mvm *mvm,
+                              struct iwl_lq_sta *lq_sta,
+                              struct iwl_scale_tbl_info *tbl,
+                              int scale_index, int attempts, int successes)
 {
        struct iwl_rate_scale_data *window = NULL;
-       int ret;
 
        if (scale_index < 0 || scale_index >= IWL_RATE_COUNT)
                return -EINVAL;
@@ -745,16 +760,6 @@ static int rs_collect_tx_data(struct iwl_mvm *mvm,
 
        /* Select window for current tx bit rate */
        window = &(tbl->win[scale_index]);
-
-       ret = _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
-                                 window);
-       if (ret)
-               return ret;
-
-       if (WARN_ON_ONCE(reduced_txp > TPC_MAX_REDUCTION))
-               return -EINVAL;
-
-       window = &tbl->tpc_win[reduced_txp];
        return _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
                                   window);
 }
@@ -1301,17 +1306,30 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
         * first index into rate scale table.
         */
        if (info->flags & IEEE80211_TX_STAT_AMPDU) {
-               /* ampdu_ack_len = 0 marks no BA was received. In this case
-                * treat it as a single frame loss as we don't want the success
-                * ratio to dip too quickly because a BA wasn't received
+               rs_collect_tpc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+                                   info->status.ampdu_len,
+                                   info->status.ampdu_ack_len,
+                                   reduced_txp);
+
+               /* ampdu_ack_len = 0 marks no BA was received. For TLC, treat
+                * it as a single frame loss as we don't want the success ratio
+                * to dip too quickly because a BA wasn't received.
+                * For TPC, there's no need for this optimisation since we want
+                * to recover very quickly from a bad power reduction and,
+                * therefore we'd like the success ratio to get an immediate hit
+                * when failing to get a BA, so we'd switch back to a lower or
+                * zero power reduction. When FW transmits agg with a rate
+                * different from the initial rate, it will not use reduced txp
+                * and will send BA notification twice (one empty with reduced
+                * txp equal to the value from LQ and one with reduced txp 0).
+                * We need to update counters for each txp level accordingly.
                 */
                if (info->status.ampdu_ack_len == 0)
                        info->status.ampdu_len = 1;
 
-               rs_collect_tx_data(mvm, lq_sta, curr_tbl, lq_rate.index,
-                                  info->status.ampdu_len,
-                                  info->status.ampdu_ack_len,
-                                  reduced_txp);
+               rs_collect_tlc_data(mvm, lq_sta, curr_tbl, lq_rate.index,
+                                   info->status.ampdu_len,
+                                   info->status.ampdu_ack_len);
 
                /* Update success/fail counts if not searching for new mode */
                if (lq_sta->rs_state == RS_STATE_STAY_IN_COLUMN) {
@@ -1344,9 +1362,13 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
                        else
                                continue;
 
-                       rs_collect_tx_data(mvm, lq_sta, tmp_tbl, lq_rate.index,
-                                          1, i < retries ? 0 : legacy_success,
-                                          reduced_txp);
+                       rs_collect_tpc_data(mvm, lq_sta, tmp_tbl,
+                                           lq_rate.index, 1,
+                                           i < retries ? 0 : legacy_success,
+                                           reduced_txp);
+                       rs_collect_tlc_data(mvm, lq_sta, tmp_tbl,
+                                           lq_rate.index, 1,
+                                           i < retries ? 0 : legacy_success);
                }
 
                /* Update success/fail counts if not searching for new mode */
index 8bf48a7d0f4e99e8297feace5368375b6a16a151..0914ec2fd57467023b0f5336b2e630856c06cf81 100644 (file)
@@ -1029,7 +1029,6 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
                struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
                mvmsta->tid_data[tid].rate_n_flags =
                        le32_to_cpu(tx_resp->initial_rate);
-               mvmsta->tid_data[tid].reduced_tpc = tx_resp->reduced_tpc;
                mvmsta->tid_data[tid].tx_time =
                        le16_to_cpu(tx_resp->wireless_media_time);
        }
@@ -1060,7 +1059,7 @@ static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info,
        /* TODO: not accounted if the whole A-MPDU failed */
        info->status.tx_time = tid_data->tx_time;
        info->status.status_driver_data[0] =
-               (void *)(uintptr_t)tid_data->reduced_tpc;
+               (void *)(uintptr_t)ba_notif->reduced_txp;
        info->status.status_driver_data[1] =
                (void *)(uintptr_t)tid_data->rate_n_flags;
 }
@@ -1133,6 +1132,8 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
                           scd_flow, ba_resp_scd_ssn, ba_notif->txed,
                           ba_notif->txed_2_done);
 
+       IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
+                          ba_notif->reduced_txp);
        tid_data->next_reclaimed = ba_resp_scd_ssn;
 
        iwl_mvm_check_ratid_empty(mvm, sta, tid);
index 6261a68cae907d793cdbf62ceb9dda97e5031fee..00335ea6b3eb5fa6035a2ce06e9fcfb0ea612ac7 100644 (file)
@@ -378,7 +378,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x3165, 0x8110, iwl3165_2ac_cfg)},
 
 /* 3168 Series */
+       {IWL_PCI_DEVICE(0x24FB, 0x2010, iwl3168_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FB, 0x2110, iwl3168_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FB, 0x2050, iwl3168_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FB, 0x2150, iwl3168_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FB, 0x0000, iwl3168_2ac_cfg)},
 
 /* 7265 Series */
@@ -475,6 +478,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
        {IWL_PCI_DEVICE(0x24F3, 0x0000, iwl8265_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)},
        {IWL_PCI_DEVICE(0x24FD, 0x8010, iwl8265_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
        {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)},
index c32889a1e39cf53d4e2d1ab3a4bfc2b8b492af1e..a28414c50edf11131e41aa65f02957a5a07c796f 100644 (file)
@@ -991,7 +991,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
                goto nla_put_failure;
        }
 
-       if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER, ETH_ALEN, hdr->addr2))
+       if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER,
+                   ETH_ALEN, data->addresses[1].addr))
                goto nla_put_failure;
 
        /* We get the skb->data */
@@ -2736,7 +2737,7 @@ static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr)
 
        spin_lock_bh(&hwsim_radio_lock);
        list_for_each_entry(data, &hwsim_radios, list) {
-               if (mac80211_hwsim_addr_match(data, addr)) {
+               if (memcmp(data->addresses[1].addr, addr, ETH_ALEN) == 0) {
                        _found = true;
                        break;
                }
index 9a3966cd6fbedb0e41e1f2c741405b10aee92fdc..155f343981fe29679d278b0621d3cf7ce64b2a77 100644 (file)
@@ -273,8 +273,10 @@ static void rt2400pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
        rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg);
index 1a6740b4d396211cd3c00290cd9abb06128c55bf..2553cdd7406623cde30a99531e4d86a74ae4836e 100644 (file)
@@ -274,8 +274,10 @@ static void rt2500pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RXCSR0_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, RXCSR0_DROP_MCAST,
index d26018f30b7dfa9e8381ceb609d5eddd10ff47a8..2d64611de300146d98bcc254171f961e3af26831 100644 (file)
@@ -437,8 +437,10 @@ static void rt2500usb_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_CONTROL,
                           !(filter_flags & FIF_CONTROL));
-       rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field16(&reg, TXRX_CSR2_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_TODS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_VERSION_ERROR, 1);
        rt2x00_set_field16(&reg, TXRX_CSR2_DROP_MULTICAST,
index 9733b31a780d380fd2bfd550efe155943b0fdb34..a26afcab03ed02242fdc7fa2f3b94307fc52024e 100644 (file)
@@ -1490,7 +1490,8 @@ void rt2800_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_FCSFAIL));
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_PHY_ERROR,
                           !(filter_flags & FIF_PLCPFAIL));
-       rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0);
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_VER_ERROR, 1);
        rt2x00_set_field32(&reg, RX_FILTER_CFG_DROP_MULTICAST,
index 3282ddb766f4224a8e10e037fa7ea4332dfe0e27..26427140a963b592a52d6c2443e45dfa24f73cf6 100644 (file)
@@ -669,6 +669,7 @@ enum rt2x00_state_flags {
        CONFIG_POWERSAVING,
        CONFIG_HT_DISABLED,
        CONFIG_QOS_DISABLED,
+       CONFIG_MONITORING,
 
        /*
         * Mark we currently are sequentially reading TX_STA_FIFO register
index 7e8bb1198ae9f749dfc3e419f1ae652ffa7a0b8d..6a1f508d472f0e6ac3be534d780d209e90a0278d 100644 (file)
@@ -277,6 +277,11 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev,
        else
                clear_bit(CONFIG_POWERSAVING, &rt2x00dev->flags);
 
+       if (conf->flags & IEEE80211_CONF_MONITOR)
+               set_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+       else
+               clear_bit(CONFIG_MONITORING, &rt2x00dev->flags);
+
        rt2x00dev->curr_band = conf->chandef.chan->band;
        rt2x00dev->curr_freq = conf->chandef.chan->center_freq;
        rt2x00dev->tx_power = conf->power_level;
index 3c26ee65a41513127cbc999416ee90fabc11d890..13da95a24cf77bb366481252226f3c4566352d67 100644 (file)
@@ -385,11 +385,6 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
                        *total_flags |= FIF_PSPOLL;
        }
 
-       /*
-        * Check if there is any work left for us.
-        */
-       if (rt2x00dev->packet_filter == *total_flags)
-               return;
        rt2x00dev->packet_filter = *total_flags;
 
        rt2x00dev->ops->lib->config_filter(rt2x00dev, *total_flags);
index c0e730ea1b69058a663f953e73165fa23a850c72..24a3436ef952870a118e0adddf5f06fa3c5ca6f5 100644 (file)
@@ -530,8 +530,10 @@ static void rt61pci_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
                           !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
index 7081e13b4fd67ebbaa7fa8af3f15a1d81fbc219e..7bbc869311681988dbbcd50907fd33d84f4f1a85 100644 (file)
@@ -480,8 +480,10 @@ static void rt73usb_config_filter(struct rt2x00_dev *rt2x00dev,
                           !(filter_flags & FIF_PLCPFAIL));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_CONTROL,
                           !(filter_flags & (FIF_CONTROL | FIF_PSPOLL)));
-       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME, 1);
+       rt2x00_set_field32(&reg, TXRX_CSR0_DROP_NOT_TO_ME,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags));
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_TO_DS,
+                          !test_bit(CONFIG_MONITORING, &rt2x00dev->flags) &&
                           !rt2x00dev->intf_ap_count);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_VERSION_ERROR, 1);
        rt2x00_set_field32(&reg, TXRX_CSR0_DROP_MULTICAST,
index a62bf0a65c321bb73553c403c5233ccd30d1f8c7..5be34118e0af22b69392f5caf07f9eef5672c7fe 100644 (file)
@@ -351,7 +351,6 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select(
        case COUNTRY_CODE_SPAIN:
        case COUNTRY_CODE_FRANCE:
        case COUNTRY_CODE_ISRAEL:
-       case COUNTRY_CODE_WORLD_WIDE_13:
                return &rtl_regdom_12_13;
        case COUNTRY_CODE_MKK:
        case COUNTRY_CODE_MKK1:
@@ -360,6 +359,7 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select(
                return &rtl_regdom_14_60_64;
        case COUNTRY_CODE_GLOBAL_DOMAIN:
                return &rtl_regdom_14;
+       case COUNTRY_CODE_WORLD_WIDE_13:
        case COUNTRY_CODE_WORLD_WIDE_13_5G_ALL:
                return &rtl_regdom_12_13_5g_all;
        default:
index d6abf191122a3260cbd97f5c1a2dbd1c2cb5c5e2..96ccd4e943db49908b0821f1d515aef36072db15 100644 (file)
@@ -364,6 +364,7 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
        RING_IDX cons, prod;
        unsigned short id;
        struct sk_buff *skb;
+       bool more_to_do;
 
        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 
@@ -398,18 +399,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
 
                queue->tx.rsp_cons = prod;
 
-               /*
-                * Set a new event, then check for race with update of tx_cons.
-                * Note that it is essential to schedule a callback, no matter
-                * how few buffers are pending. Even if there is space in the
-                * transmit ring, higher layers may be blocked because too much
-                * data is outstanding: in such cases notification from Xen is
-                * likely to be the only kick that we'll get.
-                */
-               queue->tx.sring->rsp_event =
-                       prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
-               mb();           /* update shared area */
-       } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
+               RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
+       } while (more_to_do);
 
        xennet_maybe_wake_tx(queue);
 }
index 4d5535c4cddf3877109c9a0970f88606ec7f42de..7116472b462577935eb0bad50b986ba510a038b5 100644 (file)
@@ -1 +1,2 @@
+source "drivers/ntb/hw/amd/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"
index 175d7c92a569a5af90dc026acbd177444d8b9d90..532e0859b4a11bb6c90d357ad8bc00cbea86e006 100644 (file)
@@ -1 +1,2 @@
+obj-$(CONFIG_NTB_AMD)  += amd/
 obj-$(CONFIG_NTB_INTEL)        += intel/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644 (file)
index 0000000..cfe903c
--- /dev/null
@@ -0,0 +1,7 @@
+config NTB_AMD
+       tristate "AMD Non-Transparent Bridge support"
+       depends on X86_64
+       help
+        This driver supports AMD NTB on capable Zeppelin hardware.
+
+        If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644 (file)
index 0000000..ad54da9
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644 (file)
index 0000000..588803a
--- /dev/null
@@ -0,0 +1,1143 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME       "ntb_hw_amd"
+#define NTB_DESC       "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER                "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+       if (idx < 0 || idx > ndev->mw_count)
+               return -EINVAL;
+
+       return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+                               phys_addr_t *base,
+                               resource_size_t *size,
+                               resource_size_t *align,
+                               resource_size_t *align_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       if (base)
+               *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (size)
+               *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       if (align)
+               *align = SZ_4K;
+
+       if (align_size)
+               *align_size = 1;
+
+       return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+                               dma_addr_t addr, resource_size_t size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       unsigned long xlat_reg, limit_reg = 0;
+       resource_size_t mw_size;
+       void __iomem *mmio, *peer_mmio;
+       u64 base_addr, limit, reg_val;
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       /* make sure the range fits in the usable mw size */
+       if (size > mw_size)
+               return -EINVAL;
+
+       mmio = ndev->self_mmio;
+       peer_mmio = ndev->peer_mmio;
+
+       base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (bar != 1) {
+               xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+               limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               write64(limit, mmio + limit_reg);
+               reg_val = read64(mmio + limit_reg);
+               if (reg_val != limit) {
+                       write64(base_addr, mmio + limit_reg);
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       } else {
+               xlat_reg = AMD_BAR1XLAT_OFFSET;
+               limit_reg = AMD_BAR1LMT_OFFSET;
+
+               /* split bar addr range must all be 32 bit */
+               if (addr & (~0ull << 32))
+                       return -EINVAL;
+               if ((addr + size) & (~0ull << 32))
+                       return -EINVAL;
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               writel(limit, mmio + limit_reg);
+               reg_val = readl(mmio + limit_reg);
+               if (reg_val != limit) {
+                       writel(base_addr, mmio + limit_reg);
+                       writel(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+       if (!ndev->peer_sta)
+               return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+       /* If peer_sta is reset or D0 event, the ISR has
+        * started a timer to check link status of hardware.
+        * So here just clear status bit. And if peer_sta is
+        * D3 or PME_TO, D0/reset event will be happened when
+        * system wakeup/poweron, so do nothing here.
+        */
+       if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+               ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+       else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+               ndev->peer_sta = 0;
+
+       return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+                             enum ntb_speed *speed,
+                             enum ntb_width *width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int ret = 0;
+
+       if (amd_link_is_up(ndev)) {
+               if (speed)
+                       *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+               if (width)
+                       *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+               dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+               ret = 1;
+       } else {
+               if (speed)
+                       *speed = NTB_SPEED_NONE;
+               if (width)
+                       *width = NTB_WIDTH_NONE;
+
+               dev_dbg(ndev_dev(ndev), "link is down.\n");
+       }
+
+       return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+                              enum ntb_speed max_speed,
+                              enum ntb_width max_width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Enable event interrupt */
+       ndev->int_mask &= ~AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Disable event interrupt */
+       ndev->int_mask |= AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_vector < 0 || db_vector > ndev->db_count)
+               return 0;
+
+       return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask |= db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask &= ~db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+                               phys_addr_t *db_addr,
+                               resource_size_t *db_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_addr)
+               *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+       if (db_size)
+               *db_size = sizeof(u32);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return 0;
+
+       offset = ndev->self_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+                             int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->self_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+                                 phys_addr_t *spad_addr)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       if (spad_addr)
+               *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+                                          ndev->peer_spad + (idx << 2));
+       return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+                                  int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+       .mw_count               = amd_ntb_mw_count,
+       .mw_get_range           = amd_ntb_mw_get_range,
+       .mw_set_trans           = amd_ntb_mw_set_trans,
+       .link_is_up             = amd_ntb_link_is_up,
+       .link_enable            = amd_ntb_link_enable,
+       .link_disable           = amd_ntb_link_disable,
+       .db_valid_mask          = amd_ntb_db_valid_mask,
+       .db_vector_count        = amd_ntb_db_vector_count,
+       .db_vector_mask         = amd_ntb_db_vector_mask,
+       .db_read                = amd_ntb_db_read,
+       .db_clear               = amd_ntb_db_clear,
+       .db_set_mask            = amd_ntb_db_set_mask,
+       .db_clear_mask          = amd_ntb_db_clear_mask,
+       .peer_db_addr           = amd_ntb_peer_db_addr,
+       .peer_db_set            = amd_ntb_peer_db_set,
+       .spad_count             = amd_ntb_spad_count,
+       .spad_read              = amd_ntb_spad_read,
+       .spad_write             = amd_ntb_spad_write,
+       .peer_spad_addr         = amd_ntb_peer_spad_addr,
+       .peer_spad_read         = amd_ntb_peer_spad_read,
+       .peer_spad_write        = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       int reg;
+
+       reg = readl(mmio + AMD_SMUACK_OFFSET);
+       reg |= bit;
+       writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+       ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 status;
+
+       status = readl(mmio + AMD_INTSTAT_OFFSET);
+       if (!(status & AMD_EVENT_INTMASK))
+               return;
+
+       dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+       status &= AMD_EVENT_INTMASK;
+       switch (status) {
+       case AMD_PEER_FLUSH_EVENT:
+               dev_info(ndev_dev(ndev), "Flush is done.\n");
+               break;
+       case AMD_PEER_RESET_EVENT:
+               amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+               /* link down first */
+               ntb_link_event(&ndev->ntb);
+               /* polling peer status */
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       case AMD_PEER_D3_EVENT:
+       case AMD_PEER_PMETO_EVENT:
+               amd_ack_smu(ndev, status);
+
+               /* link down */
+               ntb_link_event(&ndev->ntb);
+
+               break;
+       case AMD_PEER_D0_EVENT:
+               mmio = ndev->peer_mmio;
+               status = readl(mmio + AMD_PMESTAT_OFFSET);
+               /* check if this is WAKEUP event */
+               if (status & 0x1)
+                       dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+               amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+               /* start a timer to poll link status */
+               schedule_delayed_work(&ndev->hb_timer,
+                                     AMD_LINK_HB_TIMEOUT);
+               break;
+       default:
+               dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+               break;
+       }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+       dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+       if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+               amd_handle_event(ndev, vec);
+
+       if (vec < AMD_DB_CNT)
+               ntb_db_event(&ndev->ntb, vec);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+       struct amd_ntb_vec *nvec = dev;
+
+       return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+       struct amd_ntb_dev *ndev = dev;
+
+       return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+                        int msix_min, int msix_max)
+{
+       struct pci_dev *pdev;
+       int rc, i, msix_count, node;
+
+       pdev = ndev_pdev(ndev);
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev->db_mask = ndev->db_valid_mask;
+
+       /* Try to set up msix irq */
+       ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+                                GFP_KERNEL, node);
+       if (!ndev->vec)
+               goto err_msix_vec_alloc;
+
+       ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+                                 GFP_KERNEL, node);
+       if (!ndev->msix)
+               goto err_msix_alloc;
+
+       for (i = 0; i < msix_max; ++i)
+               ndev->msix[i].entry = i;
+
+       msix_count = pci_enable_msix_range(pdev, ndev->msix,
+                                          msix_min, msix_max);
+       if (msix_count < 0)
+               goto err_msix_enable;
+
+       /* NOTE: Disable MSIX if msix count is less than 16 because of
+        * hardware limitation.
+        */
+       if (msix_count < msix_min) {
+               pci_disable_msix(pdev);
+               goto err_msix_enable;
+       }
+
+       for (i = 0; i < msix_count; ++i) {
+               ndev->vec[i].ndev = ndev;
+               ndev->vec[i].num = i;
+               rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+                                "ndev_vec_isr", &ndev->vec[i]);
+               if (rc)
+                       goto err_msix_request;
+       }
+
+       dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+       ndev->db_count = msix_min;
+       ndev->msix_vec_count = msix_max;
+       return 0;
+
+err_msix_request:
+       while (i-- > 0)
+               free_irq(ndev->msix[i].vector, ndev);
+       pci_disable_msix(pdev);
+err_msix_enable:
+       kfree(ndev->msix);
+err_msix_alloc:
+       kfree(ndev->vec);
+err_msix_vec_alloc:
+       ndev->msix = NULL;
+       ndev->vec = NULL;
+
+       /* Try to set up msi irq */
+       rc = pci_enable_msi(pdev);
+       if (rc)
+               goto err_msi_enable;
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_msi_request;
+
+       dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_msi_request:
+       pci_disable_msi(pdev);
+err_msi_enable:
+
+       /* Try to set up intx irq */
+       pci_intx(pdev, 1);
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_intx_request;
+
+       dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_intx_request:
+       return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       void __iomem *mmio = ndev->self_mmio;
+       int i;
+
+       pdev = ndev_pdev(ndev);
+
+       /* Mask all doorbell interrupts */
+       ndev->db_mask = ndev->db_valid_mask;
+       writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+       if (ndev->msix) {
+               i = ndev->msix_vec_count;
+               while (i--)
+                       free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+               pci_disable_msix(pdev);
+               kfree(ndev->msix);
+               kfree(ndev->vec);
+       } else {
+               free_irq(pdev->irq, ndev);
+               if (pci_dev_msi_enabled(pdev))
+                       pci_disable_msi(pdev);
+               else
+                       pci_intx(pdev, 0);
+       }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct amd_ntb_dev *ndev;
+       void __iomem *mmio;
+       char *buf;
+       size_t buf_size;
+       ssize_t ret, off;
+       union { u64 v64; u32 v32; u16 v16; } u;
+
+       ndev = filp->private_data;
+       mmio = ndev->self_mmio;
+
+       buf_size = min(count, 0x800ul);
+
+       buf = kmalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       off = 0;
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "NTB Device Information:\n");
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Connection Topology -\t%s\n",
+                        ntb_topo_string(ndev->ntb.topo));
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+       if (!amd_link_is_up(ndev)) {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tDown\n");
+       } else {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tUp\n");
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Speed -\t\tPCI-E Gen %u\n",
+                                NTB_LNK_STA_SPEED(ndev->lnk_sta));
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Width -\t\tx%u\n",
+                                NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+       }
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Memory Window Count -\t%u\n", ndev->mw_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Scratchpad Count -\t%u\n", ndev->spad_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Count -\t%u\n", ndev->db_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+       u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+       u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "\nNTB Incoming XLAT:\n");
+
+       u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT1 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT23 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT45 -\t\t%#018llx\n", u.v64);
+
+       u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT1 -\t\t\t%#06x\n", u.v32);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+       kfree(buf);
+       return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+       if (!debugfs_dir) {
+               ndev->debugfs_dir = NULL;
+               ndev->debugfs_info = NULL;
+       } else {
+               ndev->debugfs_dir =
+                       debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+               if (!ndev->debugfs_dir)
+                       ndev->debugfs_info = NULL;
+               else
+                       ndev->debugfs_info =
+                               debugfs_create_file("info", S_IRUSR,
+                                                   ndev->debugfs_dir, ndev,
+                                                   &amd_ntb_debugfs_info);
+       }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+       debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+                                   struct pci_dev *pdev)
+{
+       ndev->ntb.pdev = pdev;
+       ndev->ntb.topo = NTB_TOPO_NONE;
+       ndev->ntb.ops = &amd_ntb_ops;
+       ndev->int_mask = AMD_EVENT_INTMASK;
+       spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->peer_mmio;
+       u32 reg, stat;
+       int rc;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+       dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+       if (reg == ndev->cntl_sta)
+               return 0;
+
+       ndev->cntl_sta = reg;
+
+       rc = pci_read_config_dword(ndev->ntb.pdev,
+                                  AMD_LINK_STATUS_OFFSET, &stat);
+       if (rc)
+               return 0;
+       ndev->lnk_sta = stat;
+
+       return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+       struct amd_ntb_dev *ndev = hb_ndev(work);
+
+       if (amd_poll_link(ndev))
+               ntb_link_event(&ndev->ntb);
+
+       if (!amd_link_is_up(ndev))
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+       return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (!(reg & AMD_SIDE_READY)) {
+               reg |= AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (reg & AMD_SIDE_READY) {
+               reg &= ~AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+               readl(mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+
+       ndev->mw_count = AMD_MW_CNT;
+       ndev->spad_count = AMD_SPADS_CNT;
+       ndev->db_count = AMD_DB_CNT;
+
+       switch (ndev->ntb.topo) {
+       case NTB_TOPO_PRI:
+       case NTB_TOPO_SEC:
+               ndev->spad_count >>= 1;
+               if (ndev->ntb.topo == NTB_TOPO_PRI) {
+                       ndev->self_spad = 0;
+                       ndev->peer_spad = 0x20;
+               } else {
+                       ndev->self_spad = 0x20;
+                       ndev->peer_spad = 0;
+               }
+
+               INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       default:
+               dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+               return -EINVAL;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       /* Mask event interrupts */
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 info;
+
+       info = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (info & AMD_SIDE_MASK)
+               return NTB_TOPO_SEC;
+       else
+               return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       int rc = 0;
+
+       pdev = ndev_pdev(ndev);
+
+       ndev->ntb.topo = amd_get_topo(ndev);
+       dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+               ntb_topo_string(ndev->ntb.topo));
+
+       rc = amd_init_ntb(ndev);
+       if (rc)
+               return rc;
+
+       rc = amd_init_isr(ndev);
+       if (rc) {
+               dev_err(ndev_dev(ndev), "fail to init isr.\n");
+               return rc;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+       cancel_delayed_work_sync(&ndev->hb_timer);
+
+       ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+                           struct pci_dev *pdev)
+{
+       int rc;
+
+       pci_set_drvdata(pdev, ndev);
+
+       rc = pci_enable_device(pdev);
+       if (rc)
+               goto err_pci_enable;
+
+       rc = pci_request_regions(pdev, NTB_NAME);
+       if (rc)
+               goto err_pci_regions;
+
+       pci_set_master(pdev);
+
+       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+       }
+
+       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+       }
+
+       ndev->self_mmio = pci_iomap(pdev, 0, 0);
+       if (!ndev->self_mmio) {
+               rc = -EIO;
+               goto err_dma_mask;
+       }
+       ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+       return 0;
+
+err_dma_mask:
+       pci_clear_master(pdev);
+err_pci_regions:
+       pci_disable_device(pdev);
+err_pci_enable:
+       pci_set_drvdata(pdev, NULL);
+       return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev = ndev_pdev(ndev);
+
+       pci_iounmap(pdev, ndev->self_mmio);
+
+       pci_clear_master(pdev);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       struct amd_ntb_dev *ndev;
+       int rc, node;
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+       if (!ndev) {
+               rc = -ENOMEM;
+               goto err_ndev;
+       }
+
+       ndev_init_struct(ndev, pdev);
+
+       rc = amd_ntb_init_pci(ndev, pdev);
+       if (rc)
+               goto err_init_pci;
+
+       rc = amd_init_dev(ndev);
+       if (rc)
+               goto err_init_dev;
+
+       /* write side info */
+       amd_init_side_info(ndev);
+
+       amd_poll_link(ndev);
+
+       ndev_init_debugfs(ndev);
+
+       rc = ntb_register_device(&ndev->ntb);
+       if (rc)
+               goto err_register;
+
+       dev_info(&pdev->dev, "NTB device registered.\n");
+
+       return 0;
+
+err_register:
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_dev(ndev);
+err_init_dev:
+       amd_ntb_deinit_pci(ndev);
+err_init_pci:
+       kfree(ndev);
+err_ndev:
+       return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+       struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+       ntb_unregister_device(&ndev->ntb);
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_side_info(ndev);
+       amd_deinit_dev(ndev);
+       amd_ntb_deinit_pci(ndev);
+       kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+       {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+       {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = amd_ntb_pci_tbl,
+       .probe          = amd_ntb_pci_probe,
+       .remove         = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+       if (debugfs_initialized())
+               debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+       return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+       pci_unregister_driver(&amd_ntb_pci_driver);
+       debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644 (file)
index 0000000..2eac3cd
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB  0x145B
+#define AMD_LINK_HB_TIMEOUT    msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x)  (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)   (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x)   (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+       u64 low, high;
+
+       low = readl(mmio);
+       high = readl(mmio + sizeof(u32));
+       return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+       writel(val, mmio);
+       writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+       /* AMD NTB Capability */
+       AMD_MW_CNT              = 3,
+       AMD_DB_CNT              = 16,
+       AMD_MSIX_VECTOR_CNT     = 24,
+       AMD_SPADS_CNT           = 16,
+
+       /*  AMD NTB register offset */
+       AMD_CNTL_OFFSET         = 0x200,
+
+       /* NTB control register bits */
+       PMM_REG_CTL             = BIT(21),
+       SMM_REG_CTL             = BIT(20),
+       SMM_REG_ACC_PATH        = BIT(18),
+       PMM_REG_ACC_PATH        = BIT(17),
+       NTB_CLK_EN              = BIT(16),
+
+       AMD_STA_OFFSET          = 0x204,
+       AMD_PGSLV_OFFSET        = 0x208,
+       AMD_SPAD_MUX_OFFSET     = 0x20C,
+       AMD_SPAD_OFFSET         = 0x210,
+       AMD_RSMU_HCID           = 0x250,
+       AMD_RSMU_SIID           = 0x254,
+       AMD_PSION_OFFSET        = 0x300,
+       AMD_SSION_OFFSET        = 0x330,
+       AMD_MMINDEX_OFFSET      = 0x400,
+       AMD_MMDATA_OFFSET       = 0x404,
+       AMD_SIDEINFO_OFFSET     = 0x408,
+
+       AMD_SIDE_MASK           = BIT(0),
+       AMD_SIDE_READY          = BIT(1),
+
+       /* limit register */
+       AMD_ROMBARLMT_OFFSET    = 0x410,
+       AMD_BAR1LMT_OFFSET      = 0x414,
+       AMD_BAR23LMT_OFFSET     = 0x418,
+       AMD_BAR45LMT_OFFSET     = 0x420,
+       /* xlat address */
+       AMD_POMBARXLAT_OFFSET   = 0x428,
+       AMD_BAR1XLAT_OFFSET     = 0x430,
+       AMD_BAR23XLAT_OFFSET    = 0x438,
+       AMD_BAR45XLAT_OFFSET    = 0x440,
+       /* doorbell and interrupt */
+       AMD_DBFM_OFFSET         = 0x450,
+       AMD_DBREQ_OFFSET        = 0x454,
+       AMD_MIRRDBSTAT_OFFSET   = 0x458,
+       AMD_DBMASK_OFFSET       = 0x45C,
+       AMD_DBSTAT_OFFSET       = 0x460,
+       AMD_INTMASK_OFFSET      = 0x470,
+       AMD_INTSTAT_OFFSET      = 0x474,
+
+       /* event type */
+       AMD_PEER_FLUSH_EVENT    = BIT(0),
+       AMD_PEER_RESET_EVENT    = BIT(1),
+       AMD_PEER_D3_EVENT       = BIT(2),
+       AMD_PEER_PMETO_EVENT    = BIT(3),
+       AMD_PEER_D0_EVENT       = BIT(4),
+       AMD_EVENT_INTMASK       = (AMD_PEER_FLUSH_EVENT |
+                               AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+                               AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+       AMD_PMESTAT_OFFSET      = 0x480,
+       AMD_PMSGTRIG_OFFSET     = 0x490,
+       AMD_LTRLATENCY_OFFSET   = 0x494,
+       AMD_FLUSHTRIG_OFFSET    = 0x498,
+
+       /* SMU register*/
+       AMD_SMUACK_OFFSET       = 0x4A0,
+       AMD_SINRST_OFFSET       = 0x4A4,
+       AMD_RSPNUM_OFFSET       = 0x4A8,
+       AMD_SMU_SPADMUTEX       = 0x4B0,
+       AMD_SMU_SPADOFFSET      = 0x4B4,
+
+       AMD_PEER_OFFSET         = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+       struct amd_ntb_dev      *ndev;
+       int                     num;
+};
+
+struct amd_ntb_dev {
+       struct ntb_dev ntb;
+
+       u32 ntb_side;
+       u32 lnk_sta;
+       u32 cntl_sta;
+       u32 peer_sta;
+
+       unsigned char mw_count;
+       unsigned char spad_count;
+       unsigned char db_count;
+       unsigned char msix_vec_count;
+
+       u64 db_valid_mask;
+       u64 db_mask;
+       u32 int_mask;
+
+       struct msix_entry *msix;
+       struct amd_ntb_vec *vec;
+
+       /* synchronize rmw access of db_mask and hw reg */
+       spinlock_t db_mask_lock;
+
+       void __iomem *self_mmio;
+       void __iomem *peer_mmio;
+       unsigned int self_spad;
+       unsigned int peer_spad;
+
+       struct delayed_work hb_timer;
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
index a198f82982582a155b230e3964db17b6e01479d1..40d04ef5da9e865c8ddecfe5acf431fef5976daa 100644 (file)
@@ -875,7 +875,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
        limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
 
        if (bar < 4 || !ndev->bar4_split) {
-               base = ioread64(mmio + base_reg);
+               base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
@@ -906,7 +906,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
                if ((addr + size) & (~0ull << 32))
                        return -EINVAL;
 
-               base = ioread32(mmio + base_reg);
+               base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
index 2eb4addd10d082c03184469619b6bafece08ddde..3ec149cf656276dd362fad25b37661b69d5fb3d0 100644 (file)
 #define NTB_UNSAFE_DB                  BIT_ULL(0)
 #define NTB_UNSAFE_SPAD                        BIT_ULL(1)
 
+#define NTB_BAR_MASK_64                        ~(0xfull)
+#define NTB_BAR_MASK_32                        ~(0xfu)
+
 struct intel_ntb_dev;
 
 struct intel_ntb_reg {
@@ -334,7 +337,8 @@ struct intel_ntb_dev {
 #define ndev_pdev(ndev) ((ndev)->ntb.pdev)
 #define ndev_name(ndev) pci_name(ndev_pdev(ndev))
 #define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+                                    hb_timer.work)
 
 #endif
index 60654d524858c4bf52f6c5b5668c90719f523eda..ec4775f0ec163a2ac4e3d89851bedfb4a0d52db1 100644 (file)
@@ -171,12 +171,14 @@ struct ntb_transport_qp {
        u64 rx_err_ver;
        u64 rx_memcpy;
        u64 rx_async;
+       u64 dma_rx_prep_err;
        u64 tx_bytes;
        u64 tx_pkts;
        u64 tx_ring_full;
        u64 tx_err_no_buf;
        u64 tx_memcpy;
        u64 tx_async;
+       u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -249,6 +251,8 @@ enum {
 #define QP_TO_MW(nt, qp)       ((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES 100
 #define NTB_LINK_DOWN_TIMEOUT  10
+#define DMA_RETRIES            20
+#define DMA_OUT_RESOURCE_TO    50
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -501,6 +505,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "free tx - \t%u\n",
                               ntb_transport_tx_free_entry(qp));
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA tx prep err - \t%llu\n",
+                              qp->dma_tx_prep_err);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA rx prep err - \t%llu\n",
+                              qp->dma_rx_prep_err);
 
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "\n");
@@ -726,6 +736,8 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_err_no_buf = 0;
        qp->tx_memcpy = 0;
        qp->tx_async = 0;
+       qp->dma_tx_prep_err = 0;
+       qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1228,6 +1240,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
        struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        void *buf = entry->buf;
+       int retries = 0;
 
        len = entry->len;
 
@@ -1263,11 +1276,21 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 
        unmap->from_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
-                                            unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                                    unmap->addr[0], len,
+                                                    DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_rx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_rx_copy_callback;
        txd->callback_param = entry;
@@ -1460,6 +1483,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
        void __iomem *offset;
        size_t len = entry->len;
        void *buf = entry->buf;
+       int retries = 0;
 
        offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
        hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1494,10 +1518,20 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 
        unmap->to_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+                                                    len, DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_tx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_tx_copy_callback;
        txd->callback_param = entry;
@@ -1532,7 +1566,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
 
        if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
                if (qp->tx_handler)
-                       qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+                       qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
 
                ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
                             &qp->tx_free_q);
index 01852f98a843db8b9d04e4a9e5e115bce756a4b0..a5d0eda44438eefec4b88bad9d31afa484da4c6d 100644 (file)
@@ -17,3 +17,11 @@ config NTB_TOOL
         functioning at a basic level.
 
         If unsure, say N.
+
+config NTB_PERF
+       tristate "NTB RAW Perf Measuring Tool"
+       help
+        This is a tool to measure raw NTB performance by transferring data
+        to and from the window without additional software interaction.
+
+        If unsure, say N.
index 0ea32a324b6ca206a6f9beeaa4b86c75f0cab3e1..9e77e0b761c24fd45c6f435305e2b52e1705edee 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
 obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644 (file)
index 0000000..c8a37ba
--- /dev/null
@@ -0,0 +1,748 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *   PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME            "ntb_perf"
+#define DRIVER_DESCRIPTION     "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE         "Dual BSD/GPL"
+#define DRIVER_VERSION         "1.0"
+#define DRIVER_AUTHOR          "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION           0xffff0001
+#define MAX_THREADS            32
+#define MAX_TEST_SIZE          SZ_1M
+#define MAX_SRCS               32
+#define DMA_OUT_RESOURCE_TO    50
+#define DMA_RETRIES            20
+#define SZ_4G                  (1ULL << 32)
+#define MAX_SEG_ORDER          20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+       phys_addr_t     phys_addr;
+       resource_size_t phys_size;
+       resource_size_t xlat_align;
+       resource_size_t xlat_align_size;
+       void __iomem    *vbase;
+       size_t          xlat_size;
+       size_t          buf_size;
+       void            *virt_addr;
+       dma_addr_t      dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+       struct task_struct      *thread;
+       struct perf_ctx         *perf;
+       atomic_t                dma_sync;
+       struct dma_chan         *dma_chan;
+       int                     dma_prep_err;
+       int                     src_idx;
+       void                    *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+       struct ntb_dev          *ntb;
+       spinlock_t              db_lock;
+       struct perf_mw          mw;
+       bool                    link_is_up;
+       struct work_struct      link_cleanup;
+       struct delayed_work     link_work;
+       struct dentry           *debugfs_node_dir;
+       struct dentry           *debugfs_run;
+       struct dentry           *debugfs_threads;
+       u8                      perf_threads;
+       bool                    run;
+       struct pthr_ctx         pthr_ctx[MAX_THREADS];
+       atomic_t                tsync;
+};
+
+enum {
+       VERSION = 0,
+       MW_SZ_HIGH,
+       MW_SZ_LOW,
+       SPAD_MSG,
+       SPAD_ACK,
+       MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+       struct perf_ctx *perf = ctx;
+
+       if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work, 2*HZ);
+       else
+               schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+       struct perf_ctx *perf = ctx;
+       u64 db_bits, db_mask;
+
+       db_mask = ntb_db_vector_mask(perf->ntb, vec);
+       db_bits = ntb_db_read(perf->ntb);
+
+       dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+               vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+       .link_event = perf_link_event,
+       .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+       struct pthr_ctx *pctx = data;
+
+       atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+                        char *src, size_t size)
+{
+       struct perf_ctx *perf = pctx->perf;
+       struct dma_async_tx_descriptor *txd;
+       struct dma_chan *chan = pctx->dma_chan;
+       struct dma_device *device;
+       struct dmaengine_unmap_data *unmap;
+       dma_cookie_t cookie;
+       size_t src_off, dst_off;
+       struct perf_mw *mw = &perf->mw;
+       u64 vbase, dst_vaddr;
+       dma_addr_t dst_phys;
+       int retries = 0;
+
+       if (!use_dma) {
+               memcpy_toio(dst, src, size);
+               return size;
+       }
+
+       if (!chan) {
+               dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+               return -EINVAL;
+       }
+
+       device = chan->device;
+       src_off = (size_t)src & ~PAGE_MASK;
+       dst_off = (size_t)dst & ~PAGE_MASK;
+
+       if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+               return -ENODEV;
+
+       vbase = (u64)(u64 *)mw->vbase;
+       dst_vaddr = (u64)(u64 *)dst;
+       dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+       unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+       if (!unmap)
+               return -ENOMEM;
+
+       unmap->len = size;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+                                     src_off, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
+
+       do {
+               txd = device->device_prep_dma_memcpy(chan, dst_phys,
+                                                    unmap->addr[0],
+                                                    size, DMA_PREP_INTERRUPT);
+               if (!txd) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(DMA_OUT_RESOURCE_TO);
+               }
+       } while (!txd && (++retries < DMA_RETRIES));
+
+       if (!txd) {
+               pctx->dma_prep_err++;
+               goto err_get_unmap;
+       }
+
+       txd->callback = perf_copy_callback;
+       txd->callback_param = pctx;
+       dma_set_unmap(txd, unmap);
+
+       cookie = dmaengine_submit(txd);
+       if (dma_submit_error(cookie))
+               goto err_set_unmap;
+
+       atomic_inc(&pctx->dma_sync);
+       dma_async_issue_pending(chan);
+
+       return size;
+
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
+       return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+                         u64 buf_size, u64 win_size, u64 total)
+{
+       int chunks, total_chunks, i;
+       int copied_chunks = 0;
+       u64 copied = 0, result;
+       char *tmp = dst;
+       u64 perf, diff_us;
+       ktime_t kstart, kstop, kdiff;
+
+       chunks = div64_u64(win_size, buf_size);
+       total_chunks = div64_u64(total, buf_size);
+       kstart = ktime_get();
+
+       for (i = 0; i < total_chunks; i++) {
+               result = perf_copy(pctx, tmp, src, buf_size);
+               copied += result;
+               copied_chunks++;
+               if (copied_chunks == chunks) {
+                       tmp = dst;
+                       copied_chunks = 0;
+               } else
+                       tmp += buf_size;
+
+               /* Probably should schedule every 4GB to prevent soft hang. */
+               if (((copied % SZ_4G) == 0) && !use_dma) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(1);
+               }
+       }
+
+       if (use_dma) {
+               pr_info("%s: All DMA descriptors submitted\n", current->comm);
+               while (atomic_read(&pctx->dma_sync) != 0)
+                       msleep(20);
+       }
+
+       kstop = ktime_get();
+       kdiff = ktime_sub(kstop, kstart);
+       diff_us = ktime_to_us(kdiff);
+
+       pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+       pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+       perf = div64_u64(copied, diff_us);
+
+       pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+       return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+       return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+       struct pthr_ctx *pctx = data;
+       struct perf_ctx *perf = pctx->perf;
+       struct pci_dev *pdev = perf->ntb->pdev;
+       struct perf_mw *mw = &perf->mw;
+       char *dst;
+       u64 win_size, buf_size, total;
+       void *src;
+       int rc, node, i;
+       struct dma_chan *dma_chan = NULL;
+
+       pr_info("kthread %s starting...\n", current->comm);
+
+       node = dev_to_node(&pdev->dev);
+
+       if (use_dma && !pctx->dma_chan) {
+               dma_cap_mask_t dma_mask;
+
+               dma_cap_zero(dma_mask);
+               dma_cap_set(DMA_MEMCPY, dma_mask);
+               dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+                                              (void *)(unsigned long)node);
+               if (!dma_chan) {
+                       pr_warn("%s: cannot acquire DMA channel, quitting\n",
+                               current->comm);
+                       return -ENODEV;
+               }
+               pctx->dma_chan = dma_chan;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+               if (!pctx->srcs[i]) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       win_size = mw->phys_size;
+       buf_size = 1ULL << seg_order;
+       total = 1ULL << run_order;
+
+       if (buf_size > MAX_TEST_SIZE)
+               buf_size = MAX_TEST_SIZE;
+
+       dst = (char *)mw->vbase;
+
+       atomic_inc(&perf->tsync);
+       while (atomic_read(&perf->tsync) != perf->perf_threads)
+               schedule();
+
+       src = pctx->srcs[pctx->src_idx];
+       pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+       rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+       atomic_dec(&perf->tsync);
+
+       if (rc < 0) {
+               pr_err("%s: failed\n", current->comm);
+               rc = -ENXIO;
+               goto err;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       return 0;
+
+err:
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       if (dma_chan) {
+               dma_release_channel(dma_chan);
+               pctx->dma_chan = NULL;
+       }
+
+       return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+       struct perf_mw *mw = &perf->mw;
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!mw->virt_addr)
+               return;
+
+       ntb_mw_clear_trans(perf->ntb, 0);
+       dma_free_coherent(&pdev->dev, mw->buf_size,
+                         mw->virt_addr, mw->dma_addr);
+       mw->xlat_size = 0;
+       mw->buf_size = 0;
+       mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+       struct perf_mw *mw = &perf->mw;
+       size_t xlat_size, buf_size;
+
+       if (!size)
+               return -EINVAL;
+
+       xlat_size = round_up(size, mw->xlat_align_size);
+       buf_size = round_up(size, mw->xlat_align);
+
+       if (mw->xlat_size == xlat_size)
+               return 0;
+
+       if (mw->buf_size)
+               perf_free_mw(perf);
+
+       mw->xlat_size = xlat_size;
+       mw->buf_size = buf_size;
+
+       mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+                                          &mw->dma_addr, GFP_KERNEL);
+       if (!mw->virt_addr) {
+               mw->xlat_size = 0;
+               mw->buf_size = 0;
+       }
+
+       return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+       struct perf_ctx *perf =
+               container_of(work, struct perf_ctx, link_work.work);
+       struct ntb_dev *ndev = perf->ntb;
+       struct pci_dev *pdev = ndev->pdev;
+       u32 val;
+       u64 size;
+       int rc;
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       size = perf->mw.phys_size;
+       ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+       ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+       ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+       /* now read what peer wrote */
+       val = ntb_spad_read(ndev, VERSION);
+       if (val != PERF_VERSION) {
+               dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+               goto out;
+       }
+
+       val = ntb_spad_read(ndev, MW_SZ_HIGH);
+       size = (u64)val << 32;
+
+       val = ntb_spad_read(ndev, MW_SZ_LOW);
+       size |= val;
+
+       dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+       rc = perf_set_mw(perf, size);
+       if (rc)
+               goto out1;
+
+       perf->link_is_up = true;
+
+       return;
+
+out1:
+       perf_free_mw(perf);
+
+out:
+       if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work,
+                                     msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+       struct perf_ctx *perf = container_of(work,
+                                            struct perf_ctx,
+                                            link_cleanup);
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       if (!perf->link_is_up)
+               cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+       struct perf_mw *mw;
+       int rc;
+
+       mw = &perf->mw;
+
+       rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+                             &mw->xlat_align, &mw->xlat_align_size);
+       if (rc)
+               return rc;
+
+       perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+       if (!mw->vbase)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+                               size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       char *buf;
+       ssize_t ret, out_offset;
+
+       if (!perf)
+               return 0;
+
+       buf = kmalloc(64, GFP_KERNEL);
+       out_offset = snprintf(buf, 64, "%d\n", perf->run);
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+       kfree(buf);
+
+       return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       int node, i;
+
+       if (!perf->link_is_up)
+               return 0;
+
+       if (perf->perf_threads == 0)
+               return 0;
+
+       if (atomic_read(&perf->tsync) == 0)
+               perf->run = false;
+
+       if (perf->run) {
+               /* lets stop the threads */
+               perf->run = false;
+               for (i = 0; i < MAX_THREADS; i++) {
+                       if (perf->pthr_ctx[i].thread) {
+                               kthread_stop(perf->pthr_ctx[i].thread);
+                               perf->pthr_ctx[i].thread = NULL;
+                       } else
+                               break;
+               }
+       } else {
+               perf->run = true;
+
+               if (perf->perf_threads > MAX_THREADS) {
+                       perf->perf_threads = MAX_THREADS;
+                       pr_info("Reset total threads to: %u\n", MAX_THREADS);
+               }
+
+               /* no greater than 1M */
+               if (seg_order > MAX_SEG_ORDER) {
+                       seg_order = MAX_SEG_ORDER;
+                       pr_info("Fix seg_order to %u\n", seg_order);
+               }
+
+               if (run_order < seg_order) {
+                       run_order = seg_order;
+                       pr_info("Fix run_order to %u\n", run_order);
+               }
+
+               node = dev_to_node(&perf->ntb->pdev->dev);
+               /* launch kernel thread */
+               for (i = 0; i < perf->perf_threads; i++) {
+                       struct pthr_ctx *pctx;
+
+                       pctx = &perf->pthr_ctx[i];
+                       atomic_set(&pctx->dma_sync, 0);
+                       pctx->perf = perf;
+                       pctx->thread =
+                               kthread_create_on_node(ntb_perf_thread,
+                                                      (void *)pctx,
+                                                      node, "ntb_perf %d", i);
+                       if (pctx->thread)
+                               wake_up_process(pctx->thread);
+                       else {
+                               perf->run = false;
+                               for (i = 0; i < MAX_THREADS; i++) {
+                                       if (pctx->thread) {
+                                               kthread_stop(pctx->thread);
+                                               pctx->thread = NULL;
+                                       }
+                               }
+                       }
+
+                       if (perf->run == false)
+                               return -ENXIO;
+               }
+
+       }
+
+       return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = debugfs_run_read,
+       .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!debugfs_initialized())
+               return -ENODEV;
+
+       if (!perf_debugfs_dir) {
+               perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+               if (!perf_debugfs_dir)
+                       return -ENODEV;
+       }
+
+       perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+                                                   perf_debugfs_dir);
+       if (!perf->debugfs_node_dir)
+               return -ENODEV;
+
+       perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+                                               perf->debugfs_node_dir, perf,
+                                               &ntb_perf_debugfs_run);
+       if (!perf->debugfs_run)
+               return -ENODEV;
+
+       perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+                                                 perf->debugfs_node_dir,
+                                                 &perf->perf_threads);
+       if (!perf->debugfs_threads)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct pci_dev *pdev = ntb->pdev;
+       struct perf_ctx *perf;
+       int node;
+       int rc = 0;
+
+       node = dev_to_node(&pdev->dev);
+
+       perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+       if (!perf) {
+               rc = -ENOMEM;
+               goto err_perf;
+       }
+
+       perf->ntb = ntb;
+       perf->perf_threads = 1;
+       atomic_set(&perf->tsync, 0);
+       perf->run = false;
+       spin_lock_init(&perf->db_lock);
+       perf_setup_mw(ntb, perf);
+       INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+       INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+       rc = ntb_set_ctx(ntb, perf, &perf_ops);
+       if (rc)
+               goto err_ctx;
+
+       perf->link_is_up = false;
+       ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+       ntb_link_event(ntb);
+
+       rc = perf_debugfs_setup(perf);
+       if (rc)
+               goto err_ctx;
+
+       return 0;
+
+err_ctx:
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+       kfree(perf);
+err_perf:
+       return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct perf_ctx *perf = ntb->ctx;
+       int i;
+
+       dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+
+       ntb_clear_ctx(ntb);
+       ntb_link_disable(ntb);
+
+       debugfs_remove_recursive(perf_debugfs_dir);
+       perf_debugfs_dir = NULL;
+
+       if (use_dma) {
+               for (i = 0; i < MAX_THREADS; i++) {
+                       struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+                       if (pctx->dma_chan)
+                               dma_release_channel(pctx->dma_chan);
+               }
+       }
+
+       kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+       .ops = {
+               .probe = perf_probe,
+               .remove = perf_remove,
+       },
+};
+module_ntb_client(perf_client);
index 8ebfcaae3f5a06a4aca19611fc7c5abdeb29f16a..9edf7eb7d17cc39713e67e4dd62d4e16c0d93e07 100644 (file)
@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
 
        device_lock(dev);
        claim = ndns->claim;
-       if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
-               mode = "memory";
-       else if (claim && is_nd_btt(claim))
+       if (claim && is_nd_btt(claim))
                mode = "safe";
+       else if (claim && is_nd_pfn(claim))
+               mode = "memory";
+       else if (!claim && pmem_should_map_pages(dev))
+               mode = "memory";
        else
                mode = "raw";
        rc = sprintf(buf, "%s\n", mode);
index 0cc9048b86e23103900d36f75a0b64746521ee18..ae81a2f1da50b1e9f9cf5a14bd180df19b3ccab1 100644 (file)
@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
 
        switch (le32_to_cpu(pfn_sb->mode)) {
        case PFN_MODE_RAM:
-               break;
        case PFN_MODE_PMEM:
-               /* TODO: allocate from PMEM support */
-               return -ENOTTY;
+               break;
        default:
                return -ENXIO;
        }
index 706e3ff67f8b02f4403a841c8b92f66f3c0e5678..7ee21ae305ae10d480adce961ad59fe284ca366e 100644 (file)
@@ -679,18 +679,6 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
        return __of_msi_map_rid(dev, &msi_np, rid_in);
 }
 
-static struct irq_domain *__of_get_msi_domain(struct device_node *np,
-                                             enum irq_domain_bus_token token)
-{
-       struct irq_domain *d;
-
-       d = irq_find_matching_host(np, token);
-       if (!d)
-               d = irq_find_host(np);
-
-       return d;
-}
-
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
@@ -706,7 +694,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
        struct device_node *np = NULL;
 
        __of_msi_map_rid(dev, &np, rid);
-       return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+       return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
 }
 
 /**
@@ -730,7 +718,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
        /* Check for a single msi-parent property */
        msi_np = of_parse_phandle(np, "msi-parent", 0);
        if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
-               d = __of_get_msi_domain(msi_np, token);
+               d = irq_find_matching_host(msi_np, token);
                if (!d)
                        of_node_put(msi_np);
                return d;
@@ -744,7 +732,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
                while (!of_parse_phandle_with_args(np, "msi-parent",
                                                   "#msi-cells",
                                                   index, &args)) {
-                       d = __of_get_msi_domain(args.np, token);
+                       d = irq_find_matching_host(args.np, token);
                        if (d)
                                return d;
 
index 86829f8064a61eb324a0a9ea28f63f0bf3ceadbd..5648317d355f1c4cde7cf7b48cdd21f644d882ca 100644 (file)
@@ -143,11 +143,31 @@ int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
 }
 EXPORT_SYMBOL(of_mdio_parse_addr);
 
+/* The following is a list of PHY compatible strings which appear in
+ * some DTBs. The compatible string is never matched against a PHY
+ * driver, so is pointless. We only expect devices which are not PHYs
+ * to have a compatible string, so they can be matched to an MDIO
+ * driver.  Encourage users to upgrade their DT blobs to remove these.
+ */
+static const struct of_device_id whitelist_phys[] = {
+       { .compatible = "brcm,40nm-ephy" },
+       { .compatible = "marvell,88E1111", },
+       { .compatible = "marvell,88e1116", },
+       { .compatible = "marvell,88e1118", },
+       { .compatible = "marvell,88e1149r", },
+       { .compatible = "marvell,88e1310", },
+       { .compatible = "marvell,88E1510", },
+       { .compatible = "marvell,88E1514", },
+       { .compatible = "moxa,moxart-rtl8201cp", },
+       {}
+};
+
 /*
  * Return true if the child node is for a phy. It must either:
  * o Compatible string of "ethernet-phy-idX.X"
  * o Compatible string of "ethernet-phy-ieee802.3-c45"
  * o Compatible string of "ethernet-phy-ieee802.3-c22"
+ * o In the white list above (and issue a warning)
  * o No compatibility string
  *
  * A device which is not a phy is expected to have a compatible string
@@ -166,6 +186,13 @@ static bool of_mdiobus_child_is_phy(struct device_node *child)
        if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c22"))
                return true;
 
+       if (of_match_node(whitelist_phys, child)) {
+               pr_warn(FW_WARN
+                       "%s: Whitelisted compatible string. Please remove\n",
+                       child->full_name);
+               return true;
+       }
+
        if (!of_find_property(child, "compatible", NULL))
                return true;
 
@@ -256,11 +283,19 @@ static int of_phy_match(struct device *dev, void *phy_np)
 struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
        struct device *d;
+       struct mdio_device *mdiodev;
+
        if (!phy_np)
                return NULL;
 
        d = bus_find_device(&mdio_bus_type, NULL, phy_np, of_phy_match);
-       return d ? to_phy_device(d) : NULL;
+       if (d) {
+               mdiodev = to_mdio_device(d);
+               if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+                       return to_phy_device(d);
+       }
+
+       return NULL;
 }
 EXPORT_SYMBOL(of_phy_find_device);
 
index dd92c5edf219453aac0bb98d161fc1136395f1c7..b48ac6300c792d4336487f841a2267a5b883c6f5 100644 (file)
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        dentry = d_alloc_name(root, name);
        if (!dentry) {
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                return -ENOMEM;
        }
        inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                return -ENOMEM;
        }
        inode->i_fop = fops;
        inode->i_private = priv;
        d_add(dentry, inode);
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        return 0;
 }
 
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = d_alloc_name(parent, name);
        if (!dentry) {
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                return NULL;
        }
        inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                return NULL;
        }
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        d_add(dentry, inode);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return dentry;
 }
 
index 5f2fda12e0066d9bdd90bdf08139c706a2f1a2f0..fa49f9143b80631108e10ef78e5e45e285c40cf8 100644 (file)
@@ -953,8 +953,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot)
 {
        pci_lock_rescan_remove();
 
-       if (slot->flags & SLOT_IS_GOING_AWAY)
+       if (slot->flags & SLOT_IS_GOING_AWAY) {
+               pci_unlock_rescan_remove();
                return -ENODEV;
+       }
 
        /* configure all functions */
        if (!(slot->flags & SLOT_ENABLED))
index d28db0e793dfb0a3b103bcfc71a9a6a859506ed4..d78ee151c9e4a703bbeb071cda98032cd03dccee 100644 (file)
@@ -899,6 +899,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"),
                },
        },
+       {
+               .ident = "Lenovo Yoga 700",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"),
+               },
+       },
        {
                .ident = "Lenovo Yoga 900",
                .matches = {
index 5b31d1548c07b1221114031eefc966e02538af2d..f5134acd6ff05d8a5eac080eed9f4b26a96847f3 100644 (file)
        } \
 }
 
+#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
+#endif
 
 struct telemetry_susp_stats {
        u32 shlw_swake_ctr;
index f700723ca5d6b1a8a0c342c568908f9c53aa2435..d28e3ab9479c64e41fe014714c08721fc503faf3 100644 (file)
@@ -342,6 +342,7 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
 /* Device IDs of parts that have 32KB MCH space */
 static const unsigned int mch_quirk_devices[] = {
        0x0154, /* Ivy Bridge */
+       0x0a04, /* Haswell-ULT */
        0x0c00, /* Haswell */
        0x1604, /* Broadwell */
 };
index 934c139916c609e3647acd5c6229a781ca348d56..ee4f183ef9eed337b012c4088bfeba5ae0c949a3 100644 (file)
@@ -178,7 +178,6 @@ static int ptp_ixp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
        u64 ns;
-       u32 remainder;
        unsigned long flags;
        struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
        struct ixp46x_ts_regs *regs = ixp_clock->regs;
@@ -189,8 +188,7 @@ static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 
        spin_unlock_irqrestore(&register_lock, flags);
 
-       ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-       ts->tv_nsec = remainder;
+       *ts = ns_to_timespec64(ns);
        return 0;
 }
 
@@ -202,8 +200,7 @@ static int ptp_ixp_settime(struct ptp_clock_info *ptp,
        struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
        struct ixp46x_ts_regs *regs = ixp_clock->regs;
 
-       ns = ts->tv_sec * 1000000000ULL;
-       ns += ts->tv_nsec;
+       ns = timespec64_to_ns(ts);
 
        spin_lock_irqsave(&register_lock, flags);
 
index c692dfebd0bab1c808f460349dff194b028f5c5c..50597f9522fe375ed764aab8842a400e56ef808d 100644 (file)
@@ -139,11 +139,11 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
 
        device = container_of(kobj, struct device, kobj);
        chp = to_channelpath(device);
-       if (!chp->cmg_chars)
+       if (chp->cmg == -1)
                return 0;
 
-       return memory_read_from_buffer(buf, count, &off,
-                               chp->cmg_chars, sizeof(struct cmg_chars));
+       return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+                                      sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
@@ -416,7 +416,8 @@ static void chp_release(struct device *dev)
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@ int chp_update_desc(struct channel_path *chp)
                return rc;
 
        rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+       if (rc)
+               return rc;
 
-       return rc;
+       return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,14 +469,6 @@ int chp_new(struct chp_id chpid)
                ret = -ENODEV;
                goto out_free;
        }
-       /* Get channel-measurement characteristics. */
-       if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-               ret = chsc_get_channel_measurement_chars(chp);
-               if (ret)
-                       goto out_free;
-       } else {
-               chp->cmg = -1;
-       }
        dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
        /* make it known to the system */
index 4efd5b867cc390780a18c2f983a2215ed05dcd72..af0232290dc4c7c183a65e3fe13d6683bb48cf1e 100644 (file)
@@ -48,7 +48,7 @@ struct channel_path {
        /* Channel-measurement related stuff: */
        int cmg;
        int shared;
-       void *cmg_chars;
+       struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
index a831d18596a5ef592f88ec5af031f639606c8c0b..c424c0c7367e360acd9ccd1ce899dc80bb575265 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@ out_unreg:
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@ void chsc_chp_offline(struct chp_id chpid)
        link.chpid = chpid;
        /* Wait until previous actions have settled. */
        css_wait_for_slow_path();
+
+       mutex_lock(&chp->lock);
+       chp_update_desc(chp);
+       mutex_unlock(&chp->lock);
+
        for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@ static void chsc_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 
 void chsc_chp_online(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@ void chsc_chp_online(struct chp_id chpid)
                link.chpid = chpid;
                /* Wait until previous actions have settled. */
                css_wait_for_slow_path();
+
+               mutex_lock(&chp->lock);
+               chp_update_desc(chp);
+               mutex_unlock(&chp->lock);
+
                for_each_subchannel_staged(__s390_process_res_acc, NULL,
                                           &link);
                css_schedule_reprobe();
@@ -967,22 +980,19 @@ static void
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
                          struct cmg_chars *chars)
 {
-       struct cmg_chars *cmg_chars;
        int i, mask;
 
-       cmg_chars = chp->cmg_chars;
        for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
                mask = 0x80 >> (i + 3);
                if (cmcv & mask)
-                       cmg_chars->values[i] = chars->values[i];
+                       chp->cmg_chars.values[i] = chars->values[i];
                else
-                       cmg_chars->values[i] = 0;
+                       chp->cmg_chars.values[i] = 0;
        }
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-       struct cmg_chars *cmg_chars;
        int ccode, ret;
 
        struct {
@@ -1006,10 +1016,11 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                u32 data[NR_MEASUREMENT_CHARS];
        } __attribute__ ((packed)) *scmc_area;
 
-       chp->cmg_chars = NULL;
-       cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-       if (!cmg_chars)
-               return -ENOMEM;
+       chp->shared = -1;
+       chp->cmg = -1;
+
+       if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+               return 0;
 
        spin_lock_irq(&chsc_page_lock);
        memset(chsc_page, 0, PAGE_SIZE);
@@ -1031,25 +1042,19 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                              scmc_area->response.code);
                goto out;
        }
-       if (scmc_area->not_valid) {
-               chp->cmg = -1;
-               chp->shared = -1;
+       if (scmc_area->not_valid)
                goto out;
-       }
+
        chp->cmg = scmc_area->cmg;
        chp->shared = scmc_area->shared;
        if (chp->cmg != 2 && chp->cmg != 3) {
                /* No cmg-dependent data. */
                goto out;
        }
-       chp->cmg_chars = cmg_chars;
        chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
                                  (struct cmg_chars *) &scmc_area->data);
 out:
        spin_unlock_irq(&chsc_page_lock);
-       if (!chp->cmg_chars)
-               kfree(cmg_chars);
-
        return ret;
 }
 
index 7b23f43c7b080cc81b4de94ed9016cc0d8a91546..de1b6c1d172c8c0866dc0fd8fb709d032bdef820 100644 (file)
@@ -112,9 +112,10 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        case REP82_ERROR_TRANSPORT_FAIL:
        case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        default:
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 74edf2934e7c9fd652c33d1d14bb47b9e2bf44b8..eedfaa2cf715f698db447c1a79e857f170727dc5 100644 (file)
@@ -336,9 +336,10 @@ static int convert_type80(struct zcrypt_device *zdev,
                /* The result is too short, the CEX2A card may not do that.. */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online, t80h->code);
+                              AP_QID_DEVICE(zdev->ap_dev->qid),
+                              zdev->online, t80h->code);
 
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -368,9 +369,9 @@ static int convert_response(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 9a2dd472c1cc9870d37e98c5692c1c9c1e0ebac3..21959719daef94a00bd3f40282a2022077ec1344 100644 (file)
@@ -572,9 +572,9 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
                        return -EINVAL;
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online,
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
                               msg->hdr.reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -715,9 +715,9 @@ static int convert_response_ica(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -747,9 +747,9 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
                xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -773,9 +773,9 @@ static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -800,9 +800,9 @@ static int convert_response_rng(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 2940bd769936cd7f75d2d20adc324914b0df4b84..25aba1613e2157f7a2e468007c202a16015b3f40 100644 (file)
@@ -1045,6 +1045,9 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
 static const struct file_operations tw_fops = {
        .owner          = THIS_MODULE,
        .unlocked_ioctl = tw_chrdev_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = tw_chrdev_ioctl,
+#endif
        .open           = tw_chrdev_open,
        .release        = NULL,
        .llseek         = noop_llseek,
index c1fe0d2f90ca353722cf7e2b16c2bd2c9b47601b..e2f31c93717db6e6a248d58ffaff6b8a2ba2d23f 100644 (file)
@@ -1106,6 +1106,7 @@ config SCSI_IPR
        tristate "IBM Power Linux RAID adapter support"
        depends on PCI && SCSI && ATA
        select FW_LOADER
+       select IRQ_POLL
        ---help---
          This driver supports the IBM Power Linux family RAID adapters.
          This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
@@ -1620,23 +1621,6 @@ config ATARI_SCSI
          ST-DMA, replacing ACSI).  It does NOT support other schemes, like
          in the Hades (without DMA).
 
-config ATARI_SCSI_TOSHIBA_DELAY
-       bool "Long delays for Toshiba CD-ROMs"
-       depends on ATARI_SCSI
-       help
-         This option increases the delay after a SCSI arbitration to
-         accommodate some flaky Toshiba CD-ROM drives. Say Y if you intend to
-         use a Toshiba CD-ROM drive; otherwise, the option is not needed and
-         would impact performance a bit, so say N.
-
-config ATARI_SCSI_RESET_BOOT
-       bool "Reset SCSI-devices at boottime"
-       depends on ATARI_SCSI
-       help
-         Reset the devices on your Atari whenever it boots.  This makes the
-         boot process fractionally longer but may assist recovery from errors
-         that leave the devices with SCSI operations partway completed.
-
 config MAC_SCSI
        tristate "Macintosh NCR5380 SCSI"
        depends on MAC && SCSI=y
index a777e5c412df262b3f7fc72b55676d681ade9cb8..d72867257346eebfe885bb189eb7e5a61cd6fbbe 100644 (file)
@@ -1,17 +1,17 @@
-/* 
+/*
  * NCR 5380 generic driver routines.  These should make it *trivial*
- *      to implement 5380 SCSI drivers under Linux with a non-trantor
- *      architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
  *
- *      Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
  *
  * Copyright 1993, Drew Eckhardt
- *      Visionary Computing 
- *      (Unix and Linux consulting and custom programming)
- *      drew@colorado.edu
- *      +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
  *
- * For more information, please consult 
+ * For more information, please consult
  *
  * NCR 5380 Family
  * SCSI Protocol Controller
  */
 
 /*
- * Revision 1.10 1998/9/2      Alan Cox
- *                             (alan@lxorguk.ukuu.org.uk)
- * Fixed up the timer lockups reported so far. Things still suck. Looking 
- * forward to 2.3 and per device request queues. Then it'll be possible to
- * SMP thread this beast and improve life no end.
- * Revision 1.9  1997/7/27     Ronald van Cuijlenborg
- *                             (ronald.van.cuijlenborg@tip.nl or nutty@dds.nl)
- * (hopefully) fixed and enhanced USLEEP
- * added support for DTC3181E card (for Mustek scanner)
- *
-
- * Revision 1.8                        Ingmar Baumgart
- *                             (ingmar@gonzo.schwaben.de)
- * added support for NCR53C400a card
- *
-
- * Revision 1.7  1996/3/2       Ray Van Tassle (rayvt@comm.mot.com)
- * added proc_info
- * added support needed for DTC 3180/3280
- * fixed a couple of bugs
- *
-
- * Revision 1.5  1994/01/19  09:14:57  drew
- * Fixed udelay() hack that was being used on DATAOUT phases
- * instead of a proper wait for the final handshake.
- *
- * Revision 1.4  1994/01/19  06:44:25  drew
- * *** empty log message ***
- *
- * Revision 1.3  1994/01/19  05:24:40  drew
- * Added support for TCR LAST_BYTE_SENT bit.
- *
- * Revision 1.2  1994/01/15  06:14:11  drew
- * REAL DMA support, bug fixes.
- *
- * Revision 1.1  1994/01/15  06:00:54  drew
- * Initial revision
- *
+ * With contributions from Ray Van Tassle, Ingmar Baumgart,
+ * Ronald van Cuijlenborg, Alan Cox and others.
  */
 
 /*
- * Further development / testing that should be done : 
+ * Further development / testing that should be done :
  * 1.  Cleanup the NCR5380_transfer_dma function and DMA operation complete
- *     code so that everything does the same thing that's done at the 
- *     end of a pseudo-DMA read operation.
+ * code so that everything does the same thing that's done at the
+ * end of a pseudo-DMA read operation.
  *
  * 2.  Fix REAL_DMA (interrupt driven, polled works fine) -
- *     basically, transfer size needs to be reduced by one 
- *     and the last byte read as is done with PSEUDO_DMA.
- * 
- * 4.  Test SCSI-II tagged queueing (I have no devices which support 
- *      tagged queueing)
- *
- * 5.  Test linked command handling code after Eric is ready with 
- *      the high level code.
+ * basically, transfer size needs to be reduced by one
+ * and the last byte read as is done with PSEUDO_DMA.
+ *
+ * 4.  Test SCSI-II tagged queueing (I have no devices which support
+ * tagged queueing)
  */
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x,y) {printk("LINE:%d   Adding %p to %p\n", __LINE__, (void*)(x), (void*)(y)); if ((x)==(y)) udelay(5); }
-#define REMOVE(w,x,y,z) {printk("LINE:%d   Removing: %p->%p  %p->%p \n", __LINE__, (void*)(w), (void*)(x), (void*)(y), (void*)(z)); if ((x)==(y)) udelay(5); }
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
 
 #ifndef notyet
-#undef LINKED
 #undef REAL_DMA
 #endif
 
-#ifdef REAL_DMA_POLL
-#undef READ_OVERRUNS
-#define READ_OVERRUNS
-#endif
-
 #ifdef BOARD_REQUIRES_NO_DELAY
 #define io_recovery_delay(x)
 #else
 /*
  * Design
  *
- * This is a generic 5380 driver.  To use it on a different platform, 
+ * This is a generic 5380 driver.  To use it on a different platform,
  * one simply writes appropriate system specific macros (ie, data
- * transfer - some PC's will use the I/O bus, 68K's must use 
+ * transfer - some PC's will use the I/O bus, 68K's must use
  * memory mapped) and drops this file in their 'C' wrapper.
  *
- * (Note from hch:  unfortunately it was not enough for the different
- * m68k folks and instead of improving this driver they copied it
- * and hacked it up for their needs.  As a consequence they lost
- * most updates to this driver.  Maybe someone will fix all these
- * drivers to use a common core one day..)
- *
- * As far as command queueing, two queues are maintained for 
+ * As far as command queueing, two queues are maintained for
  * each 5380 in the system - commands that haven't been issued yet,
- * and commands that are currently executing.  This means that an 
- * unlimited number of commands may be queued, letting 
- * more commands propagate from the higher driver levels giving higher 
- * throughput.  Note that both I_T_L and I_T_L_Q nexuses are supported, 
- * allowing multiple commands to propagate all the way to a SCSI-II device 
+ * and commands that are currently executing.  This means that an
+ * unlimited number of commands may be queued, letting
+ * more commands propagate from the higher driver levels giving higher
+ * throughput.  Note that both I_T_L and I_T_L_Q nexuses are supported,
+ * allowing multiple commands to propagate all the way to a SCSI-II device
  * while a command is already executing.
  *
  *
- * Issues specific to the NCR5380 : 
- *
- * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead 
- * piece of hardware that requires you to sit in a loop polling for 
- * the REQ signal as long as you are connected.  Some devices are 
- * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect 
- * while doing long seek operations.
- * 
- * The workaround for this is to keep track of devices that have
- * disconnected.  If the device hasn't disconnected, for commands that
- * should disconnect, we do something like 
+ * Issues specific to the NCR5380 :
  *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- * 
- * Some tweaking of N and M needs to be done.  An algorithm based 
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these 
+ * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
+ * piece of hardware that requires you to sit in a loop polling for
+ * the REQ signal as long as you are connected.  Some devices are
+ * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
+ * while doing long seek operations. [...] These
  * broken devices are the exception rather than the rule and I'd rather
  * spend my time optimizing for the normal case.
  *
  * which is started from a workqueue for each NCR5380 host in the
  * system.  It attempts to establish I_T_L or I_T_L_Q nexuses by
  * removing the commands from the issue queue and calling
- * NCR5380_select() if a nexus is not established. 
+ * NCR5380_select() if a nexus is not established.
  *
  * Once a nexus is established, the NCR5380_information_transfer()
  * phase goes through the various phases as instructed by the target.
  * if the target goes into MSG IN and sends a DISCONNECT message,
  * the command structure is placed into the per instance disconnected
- * queue, and NCR5380_main tries to find more work.  If the target is 
+ * queue, and NCR5380_main tries to find more work.  If the target is
  * idle for too long, the system will try to sleep.
  *
  * If a command has disconnected, eventually an interrupt will trigger,
  * calling NCR5380_intr()  which will in turn call NCR5380_reselect
  * to reestablish a nexus.  This will run main if necessary.
  *
- * On command termination, the done function will be called as 
+ * On command termination, the done function will be called as
  * appropriate.
  *
- * SCSI pointers are maintained in the SCp field of SCSI command 
+ * SCSI pointers are maintained in the SCp field of SCSI command
  * structures, being initialized after the command is connected
  * in NCR5380_select, and set as appropriate in NCR5380_information_transfer.
  * Note that in violation of the standard, an implicit SAVE POINTERS operation
 /*
  * Using this file :
  * This file a skeleton Linux SCSI driver for the NCR 5380 series
- * of chips.  To use it, you write an architecture specific functions 
+ * of chips.  To use it, you write an architecture specific functions
  * and macros and include this file in your driver.
  *
- * These macros control options : 
- * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be 
- *      defined.
- * 
+ * These macros control options :
+ * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
+ * defined.
+ *
  * AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- *      for commands that return with a CHECK CONDITION status. 
+ * for commands that return with a CHECK CONDITION status.
  *
  * DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- *      transceivers. 
+ * transceivers.
  *
  * DONT_USE_INTR - if defined, never use interrupts, even if we probe or
- *      override-configure an IRQ.
- *
- * LIMIT_TRANSFERSIZE - if defined, limit the pseudo-dma transfers to 512
- *      bytes at a time.  Since interrupts are disabled by default during
- *      these transfers, we might need this to give reasonable interrupt
- *      service time if the transfer size gets too large.
- *
- * LINKED - if defined, linked commands are supported.
+ * override-configure an IRQ.
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
  * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
  *
  * REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- *      rely on phase mismatch and EOP interrupts to determine end 
- *      of phase.
- *
- * UNSAFE - leave interrupts enabled during pseudo-DMA transfers.  You
- *          only really want to use this if you're having a problem with
- *          dropped characters during high speed communications, and even
- *          then, you're going to be better off twiddling with transfersize
- *          in the high level code.
- *
- * Defaults for these will be provided although the user may want to adjust 
- * these to allocate CPU resources to the SCSI driver or "real" code.
- * 
- * USLEEP_SLEEP - amount of time, in jiffies, to sleep
- *
- * USLEEP_POLL - amount of time, in jiffies, to poll
+ * rely on phase mismatch and EOP interrupts to determine end
+ * of phase.
  *
  * These macros MUST be defined :
- * NCR5380_local_declare() - declare any local variables needed for your
- *      transfer routines.
  *
- * NCR5380_setup(instance) - initialize any local variables needed from a given
- *      instance of the host adapter for NCR5380_{read,write,pread,pwrite}
- * 
  * NCR5380_read(register)  - read from the specified register
  *
- * NCR5380_write(register, value) - write to the specific register 
+ * NCR5380_write(register, value) - write to the specific register
  *
- * NCR5380_implementation_fields  - additional fields needed for this 
- *      specific implementation of the NCR5380
+ * NCR5380_implementation_fields  - additional fields needed for this
+ * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * REAL functions : 
+ * REAL functions :
  * NCR5380_REAL_DMA should be defined if real DMA is to be used.
- * Note that the DMA setup functions should return the number of bytes 
- *      that they were able to program the controller for.
+ * Note that the DMA setup functions should return the number of bytes
+ * that they were able to program the controller for.
  *
- * Also note that generic i386/PC versions of these macros are 
- *      available as NCR5380_i386_dma_write_setup,
- *      NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * Also note that generic i386/PC versions of these macros are
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_pread(instance, dst, count);
  *
  * The generic driver is initialized by calling NCR5380_init(instance),
- * after setting the appropriate host specific fields and ID.  If the 
+ * after setting the appropriate host specific fields and ID.  If the
  * driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
  * possible) function may be used.
  */
 
-static int do_abort(struct Scsi_Host *host);
-static void do_reset(struct Scsi_Host *host);
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
 
-/*
- *     initialize_SCp          -       init the scsi pointer field
- *     @cmd: command block to set up
+/**
+ * initialize_SCp - init the scsi pointer field
+ * @cmd: command block to set up
  *
- *     Set up the internal fields in the SCSI command.
+ * Set up the internal fields in the SCSI command.
  */
 
 static inline void initialize_SCp(struct scsi_cmnd *cmd)
 {
-       /* 
-        * Initialize the Scsi Pointer field so that all of the commands in the 
+       /*
+        * Initialize the Scsi Pointer field so that all of the commands in the
         * various queues are valid.
         */
 
@@ -295,120 +198,123 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.ptr = NULL;
                cmd->SCp.this_residual = 0;
        }
+
+       cmd->SCp.Status = 0;
+       cmd->SCp.Message = 0;
 }
 
 /**
- *     NCR5380_poll_politely   -       wait for NCR5380 status bits
- *     @instance: controller to poll
- *     @reg: 5380 register to poll
- *     @bit: Bitmask to check
- *     @val: Value required to exit
- *
- *     Polls the NCR5380 in a reasonably efficient manner waiting for
- *     an event to occur, after a short quick poll we begin giving the
- *     CPU back in non IRQ contexts
- *
- *     Returns the value of the register or a negative error code.
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
  */
-static int NCR5380_poll_politely(struct Scsi_Host *instance, int reg, int bit, int val, int t)
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+                                  int reg1, int bit1, int val1,
+                                  int reg2, int bit2, int val2, int wait)
 {
-       NCR5380_local_declare();
-       int n = 500;            /* At about 8uS a cycle for the cpu access */
-       unsigned long end = jiffies + t;
-       int r;
-       
-       NCR5380_setup(instance);
-
-       while( n-- > 0)
-       {
-               r = NCR5380_read(reg);
-               if((r & bit) == val)
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long deadline = jiffies + wait;
+       unsigned long n;
+
+       /* Busy-wait for up to 10 ms */
+       n = min(10000U, jiffies_to_usecs(wait));
+       n *= hostdata->accesses_per_ms;
+       n /= 2000;
+       do {
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
                        return 0;
                cpu_relax();
-       }
-       
-       /* t time yet ? */
-       while(time_before(jiffies, end))
-       {
-               r = NCR5380_read(reg);
-               if((r & bit) == val)
+       } while (n--);
+
+       if (irqs_disabled() || in_interrupt())
+               return -ETIMEDOUT;
+
+       /* Repeatedly sleep for 1 ms until deadline */
+       while (time_is_after_jiffies(deadline)) {
+               schedule_timeout_uninterruptible(1);
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
                        return 0;
-               if(!in_interrupt())
-                       cond_resched();
-               else
-                       cpu_relax();
        }
+
        return -ETIMEDOUT;
 }
 
-static struct {
-       unsigned char value;
-       const char *name;
-} phases[] __maybe_unused = {
-       {PHASE_DATAOUT, "DATAOUT"}, 
-       {PHASE_DATAIN, "DATAIN"}, 
-       {PHASE_CMDOUT, "CMDOUT"}, 
-       {PHASE_STATIN, "STATIN"}, 
-       {PHASE_MSGOUT, "MSGOUT"}, 
-       {PHASE_MSGIN, "MSGIN"}, 
-       {PHASE_UNKNOWN, "UNKNOWN"}
-};
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+                                        int reg, int bit, int val, int wait)
+{
+       return NCR5380_poll_politely2(instance, reg, bit, val,
+                                               reg, bit, val, wait);
+}
 
 #if NDEBUG
 static struct {
        unsigned char mask;
        const char *name;
-} signals[] = { 
-       {SR_DBP, "PARITY"}, 
-       {SR_RST, "RST"}, 
-       {SR_BSY, "BSY"}, 
-       {SR_REQ, "REQ"}, 
-       {SR_MSG, "MSG"}, 
-       {SR_CD, "CD"}, 
-       {SR_IO, "IO"}, 
-       {SR_SEL, "SEL"}, 
+} signals[] = {
+       {SR_DBP, "PARITY"},
+       {SR_RST, "RST"},
+       {SR_BSY, "BSY"},
+       {SR_REQ, "REQ"},
+       {SR_MSG, "MSG"},
+       {SR_CD, "CD"},
+       {SR_IO, "IO"},
+       {SR_SEL, "SEL"},
        {0, NULL}
-}, 
+},
 basrs[] = {
-       {BASR_ATN, "ATN"}, 
-       {BASR_ACK, "ACK"}, 
+       {BASR_ATN, "ATN"},
+       {BASR_ACK, "ACK"},
        {0, NULL}
-}, 
-icrs[] = { 
-       {ICR_ASSERT_RST, "ASSERT RST"}, 
-       {ICR_ASSERT_ACK, "ASSERT ACK"}, 
-       {ICR_ASSERT_BSY, "ASSERT BSY"}, 
-       {ICR_ASSERT_SEL, "ASSERT SEL"}, 
-       {ICR_ASSERT_ATN, "ASSERT ATN"}, 
-       {ICR_ASSERT_DATA, "ASSERT DATA"}, 
+},
+icrs[] = {
+       {ICR_ASSERT_RST, "ASSERT RST"},
+       {ICR_ASSERT_ACK, "ASSERT ACK"},
+       {ICR_ASSERT_BSY, "ASSERT BSY"},
+       {ICR_ASSERT_SEL, "ASSERT SEL"},
+       {ICR_ASSERT_ATN, "ASSERT ATN"},
+       {ICR_ASSERT_DATA, "ASSERT DATA"},
        {0, NULL}
-}, 
-mrs[] = { 
-       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, 
-       {MR_TARGET, "MODE TARGET"}, 
-       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, 
-       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"}, 
-       {MR_MONITOR_BSY, "MODE MONITOR BSY"}, 
-       {MR_DMA_MODE, "MODE DMA"}, 
-       {MR_ARBITRATE, "MODE ARBITRATION"}, 
+},
+mrs[] = {
+       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+       {MR_TARGET, "MODE TARGET"},
+       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+       {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
+       {MR_MONITOR_BSY, "MODE MONITOR BSY"},
+       {MR_DMA_MODE, "MODE DMA"},
+       {MR_ARBITRATE, "MODE ARBITRATION"},
        {0, NULL}
 };
 
 /**
- *     NCR5380_print   -       print scsi bus signals
- *     @instance:      adapter state to dump
- *
- *     Print the SCSI bus signals for debugging purposes
+ * NCR5380_print - print scsi bus signals
+ * @instance: adapter state to dump
  *
- *     Locks: caller holds hostdata lock (not essential)
+ * Print the SCSI bus signals for debugging purposes
  */
 
 static void NCR5380_print(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
        unsigned char status, data, basr, mr, icr, i;
-       NCR5380_setup(instance);
 
        data = NCR5380_read(CURRENT_SCSI_DATA_REG);
        status = NCR5380_read(STATUS_REG);
@@ -435,117 +341,56 @@ static void NCR5380_print(struct Scsi_Host *instance)
        printk("\n");
 }
 
+static struct {
+       unsigned char value;
+       const char *name;
+} phases[] = {
+       {PHASE_DATAOUT, "DATAOUT"},
+       {PHASE_DATAIN, "DATAIN"},
+       {PHASE_CMDOUT, "CMDOUT"},
+       {PHASE_STATIN, "STATIN"},
+       {PHASE_MSGOUT, "MSGOUT"},
+       {PHASE_MSGIN, "MSGIN"},
+       {PHASE_UNKNOWN, "UNKNOWN"}
+};
 
-/* 
- *     NCR5380_print_phase     -       show SCSI phase
- *     @instance: adapter to dump
- *
- *     Print the current SCSI phase for debugging purposes
+/**
+ * NCR5380_print_phase - show SCSI phase
+ * @instance: adapter to dump
  *
- *     Locks: none
+ * Print the current SCSI phase for debugging purposes
  */
 
 static void NCR5380_print_phase(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
        unsigned char status;
        int i;
-       NCR5380_setup(instance);
 
        status = NCR5380_read(STATUS_REG);
        if (!(status & SR_REQ))
-               printk("scsi%d : REQ not asserted, phase unknown.\n", instance->host_no);
+               shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
        else {
-               for (i = 0; (phases[i].value != PHASE_UNKNOWN) && (phases[i].value != (status & PHASE_MASK)); ++i);
-               printk("scsi%d : phase %s\n", instance->host_no, phases[i].name);
+               for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
+                    (phases[i].value != (status & PHASE_MASK)); ++i)
+                       ;
+               shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
        }
 }
 #endif
 
-/*
- * These need tweaking, and would probably work best as per-device 
- * flags initialized differently for disk, tape, cd, etc devices.
- * People with broken devices are free to experiment as to what gives
- * the best results for them.
- *
- * USLEEP_SLEEP should be a minimum seek time.
- *
- * USLEEP_POLL should be a maximum rotational latency.
- */
-#ifndef USLEEP_SLEEP
-/* 20 ms (reasonable hard disk speed) */
-#define USLEEP_SLEEP msecs_to_jiffies(20)
-#endif
-/* 300 RPM (floppy speed) */
-#ifndef USLEEP_POLL
-#define USLEEP_POLL msecs_to_jiffies(200)
-#endif
-#ifndef USLEEP_WAITLONG
-/* RvC: (reasonable time to wait on select error) */
-#define USLEEP_WAITLONG USLEEP_SLEEP
-#endif
-
-/* 
- * Function : int should_disconnect (unsigned char cmd)
- *
- * Purpose : decide whether a command would normally disconnect or 
- *      not, since if it won't disconnect we should go to sleep.
- *
- * Input : cmd - opcode of SCSI command
- *
- * Returns : DISCONNECT_LONG if we should disconnect for a really long 
- *      time (ie always, sleep, look for REQ active, sleep), 
- *      DISCONNECT_TIME_TO_DATA if we would only disconnect for a normal
- *      time-to-data delay, DISCONNECT_NONE if this command would return
- *      immediately.
- *
- *      Future sleep algorithms based on time to data can exploit 
- *      something like this so they can differentiate between "normal" 
- *      (ie, read, write, seek) and unusual commands (ie, * format).
- *
- * Note : We don't deal with commands that handle an immediate disconnect,
- *        
- */
 
-static int should_disconnect(unsigned char cmd)
-{
-       switch (cmd) {
-       case READ_6:
-       case WRITE_6:
-       case SEEK_6:
-       case READ_10:
-       case WRITE_10:
-       case SEEK_10:
-               return DISCONNECT_TIME_TO_DATA;
-       case FORMAT_UNIT:
-       case SEARCH_HIGH:
-       case SEARCH_LOW:
-       case SEARCH_EQUAL:
-               return DISCONNECT_LONG;
-       default:
-               return DISCONNECT_NONE;
-       }
-}
-
-static void NCR5380_set_timer(struct NCR5380_hostdata *hostdata, unsigned long timeout)
-{
-       hostdata->time_expires = jiffies + timeout;
-       schedule_delayed_work(&hostdata->coroutine, timeout);
-}
-
-
-static int probe_irq __initdata = 0;
+static int probe_irq __initdata;
 
 /**
- *     probe_intr      -       helper for IRQ autoprobe
- *     @irq: interrupt number
- *     @dev_id: unused
- *     @regs: unused
+ * probe_intr  -       helper for IRQ autoprobe
+ * @irq: interrupt number
+ * @dev_id: unused
+ * @regs: unused
  *
- *     Set a flag to indicate the IRQ in question was received. This is
- *     used by the IRQ probe code.
+ * Set a flag to indicate the IRQ in question was received. This is
+ * used by the IRQ probe code.
  */
+
 static irqreturn_t __init probe_intr(int irq, void *dev_id)
 {
        probe_irq = irq;
@@ -553,24 +398,20 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id)
 }
 
 /**
- *     NCR5380_probe_irq       -       find the IRQ of an NCR5380
- *     @instance: NCR5380 controller
- *     @possible: bitmask of ISA IRQ lines
- *
- *     Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
- *     and then looking to see what interrupt actually turned up.
+ * NCR5380_probe_irq   -       find the IRQ of an NCR5380
+ * @instance: NCR5380 controller
+ * @possible: bitmask of ISA IRQ lines
  *
- *     Locks: none, irqs must be enabled on entry
+ * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
+ * and then looking to see what interrupt actually turned up.
  */
 
 static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
                                                int possible)
 {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned long timeout;
        int trying_irqs, i, mask;
-       NCR5380_setup(instance);
 
        for (trying_irqs = 0, i = 1, mask = 2; i < 16; ++i, mask <<= 1)
                if ((mask & possible) && (request_irq(i, &probe_intr, 0, "NCR-probe", NULL) == 0))
@@ -581,7 +422,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 
        /*
         * A interrupt is triggered whenever BSY = false, SEL = true
-        * and a bit set in the SELECT_ENABLE_REG is asserted on the 
+        * and a bit set in the SELECT_ENABLE_REG is asserted on the
         * SCSI bus.
         *
         * Note that the bus is only driven when the phase control signals
@@ -596,7 +437,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 
        while (probe_irq == NO_IRQ && time_before(jiffies, timeout))
                schedule_timeout_uninterruptible(1);
-       
+
        NCR5380_write(SELECT_ENABLE_REG, 0);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
@@ -608,12 +449,10 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 }
 
 /**
- *     NCR58380_info - report driver and host information
- *     @instance: relevant scsi host instance
- *
- *     For use as the host template info() handler.
+ * NCR58380_info - report driver and host information
+ * @instance: relevant scsi host instance
  *
- *     Locks: none
+ * For use as the host template info() handler.
  */
 
 static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -633,20 +472,14 @@ static void prepare_info(struct Scsi_Host *instance)
                 "can_queue %d, cmd_per_lun %d, "
                 "sg_tablesize %d, this_id %d, "
                 "flags { %s%s%s}, "
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-                "USLEEP_POLL %lu, USLEEP_WAITLONG %lu, "
-#endif
                 "options { %s} ",
                 instance->hostt->name, instance->io_port, instance->n_io_port,
                 instance->base, instance->irq,
                 instance->can_queue, instance->cmd_per_lun,
                 instance->sg_tablesize, instance->this_id,
-                hostdata->flags & FLAG_NCR53C400     ? "NCR53C400 "     : "",
-                hostdata->flags & FLAG_DTC3181E      ? "DTC3181E "      : "",
+                hostdata->flags & FLAG_NO_DMA_FIXUP  ? "NO_DMA_FIXUP "  : "",
                 hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-                USLEEP_POLL, USLEEP_WAITLONG,
-#endif
+                hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY "  : "",
 #ifdef AUTOPROBE_IRQ
                 "AUTOPROBE_IRQ "
 #endif
@@ -664,47 +497,11 @@ static void prepare_info(struct Scsi_Host *instance)
 #endif
 #ifdef PSEUDO_DMA
                 "PSEUDO_DMA "
-#endif
-#ifdef UNSAFE
-                "UNSAFE "
-#endif
-#ifdef NCR53C400
-                "NCR53C400 "
 #endif
                 "");
 }
 
-/**
- *     NCR5380_print_status    -       dump controller info
- *     @instance: controller to dump
- *
- *     Print commands in the various queues, called from NCR5380_abort 
- *     and NCR5380_debug to aid debugging.
- *
- *     Locks: called functions disable irqs
- */
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
-       NCR5380_dprint(NDEBUG_ANY, instance);
-       NCR5380_dprint_phase(NDEBUG_ANY, instance);
-}
-
 #ifdef PSEUDO_DMA
-/******************************************/
-/*
- * /proc/scsi/[dtc pas16 t128 generic]/[0-ASC_NUM_BOARD_SUPPORTED]
- *
- * *buffer: I/O buffer
- * **start: if inout == FALSE pointer into buffer where user read should start
- * offset: current offset
- * length: length of buffer
- * hostno: Scsi_Host host_no
- * inout: TRUE - user is writing; FALSE - user is reading
- *
- * Return the number of bytes read from or written
- */
-
 static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
        char *buffer, int length)
 {
@@ -714,104 +511,41 @@ static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
        hostdata->spin_max_w = 0;
        return 0;
 }
-#endif
-
-static
-void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m);
-static
-void lprint_command(unsigned char *cmd, struct seq_file *m);
-static
-void lprint_opcode(int opcode, struct seq_file *m);
 
 static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-       struct Scsi_Host *instance)
+                                            struct Scsi_Host *instance)
 {
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-
-       hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
-#ifdef PSEUDO_DMA
        seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
                hostdata->spin_max_w, hostdata->spin_max_r);
-#endif
-       spin_lock_irq(instance->host_lock);
-       if (!hostdata->connected)
-               seq_printf(m, "scsi%d: no currently connected command\n", instance->host_no);
-       else
-               lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
-       seq_printf(m, "scsi%d: issue_queue\n", instance->host_no);
-       for (ptr = (struct scsi_cmnd *) hostdata->issue_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
-               lprint_Scsi_Cmnd(ptr, m);
-
-       seq_printf(m, "scsi%d: disconnected_queue\n", instance->host_no);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
-               lprint_Scsi_Cmnd(ptr, m);
-       spin_unlock_irq(instance->host_lock);
        return 0;
 }
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
-       seq_printf(m, "scsi%d : destination target %d, lun %llu\n", cmd->device->host->host_no, cmd->device->id, cmd->device->lun);
-       seq_puts(m, "        command = ");
-       lprint_command(cmd->cmnd, m);
-}
-
-static void lprint_command(unsigned char *command, struct seq_file *m)
-{
-       int i, s;
-       lprint_opcode(command[0], m);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               seq_printf(m, "%02x ", command[i]);
-       seq_putc(m, '\n');
-}
-
-static void lprint_opcode(int opcode, struct seq_file *m)
-{
-       seq_printf(m, "%2d (0x%02x)", opcode, opcode);
-}
-
+#endif
 
 /**
- *     NCR5380_init    -       initialise an NCR5380
- *     @instance: adapter to configure
- *     @flags: control flags
+ * NCR5380_init - initialise an NCR5380
+ * @instance: adapter to configure
+ * @flags: control flags
  *
- *     Initializes *instance and corresponding 5380 chip,
- *      with flags OR'd into the initial flags value.
+ * Initializes *instance and corresponding 5380 chip,
+ * with flags OR'd into the initial flags value.
  *
- *     Notes : I assume that the host, hostno, and id bits have been
- *      set correctly.  I don't care about the irq and other fields. 
+ * Notes : I assume that the host, hostno, and id bits have been
+ * set correctly. I don't care about the irq and other fields.
  *
- *     Returns 0 for success
- *
- *     Locks: interrupts must be enabled when we are called 
+ * Returns 0 for success
  */
 
 static int NCR5380_init(struct Scsi_Host *instance, int flags)
 {
-       NCR5380_local_declare();
-       int i, pass;
-       unsigned long timeout;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
-       if(in_interrupt())
-               printk(KERN_ERR "NCR5380_init called with interrupts off!\n");
-       /* 
-        * On NCR53C400 boards, NCR5380 registers are mapped 8 past 
-        * the base address.
-        */
-
-#ifdef NCR53C400
-       if (flags & FLAG_NCR53C400)
-               instance->NCR5380_instance_name += NCR53C400_address_adjust;
-#endif
-
-       NCR5380_setup(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int i;
+       unsigned long deadline;
 
-       hostdata->aborted = 0;
+       hostdata->host = instance;
        hostdata->id_mask = 1 << instance->this_id;
+       hostdata->id_higher_mask = 0;
        for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
                if (i > hostdata->id_mask)
                        hostdata->id_higher_mask |= i;
@@ -820,21 +554,21 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
 #ifdef REAL_DMA
        hostdata->dmalen = 0;
 #endif
-       hostdata->targets_present = 0;
+       spin_lock_init(&hostdata->lock);
        hostdata->connected = NULL;
-       hostdata->issue_queue = NULL;
-       hostdata->disconnected_queue = NULL;
-       
-       INIT_DELAYED_WORK(&hostdata->coroutine, NCR5380_main);
-       
-       /* The CHECK code seems to break the 53C400. Will check it later maybe */
-       if (flags & FLAG_NCR53C400)
-               hostdata->flags = FLAG_HAS_LAST_BYTE_SENT | flags;
-       else
-               hostdata->flags = FLAG_CHECK_LAST_BYTE_SENT | flags;
+       hostdata->sensing = NULL;
+       INIT_LIST_HEAD(&hostdata->autosense);
+       INIT_LIST_HEAD(&hostdata->unissued);
+       INIT_LIST_HEAD(&hostdata->disconnected);
 
-       hostdata->host = instance;
-       hostdata->time_expires = 0;
+       hostdata->flags = flags;
+
+       INIT_WORK(&hostdata->main_task, NCR5380_main);
+       hostdata->work_q = alloc_workqueue("ncr5380_%d",
+                               WQ_UNBOUND | WQ_MEM_RECLAIM,
+                               1, instance->host_no);
+       if (!hostdata->work_q)
+               return -ENOMEM;
 
        prepare_info(instance);
 
@@ -843,43 +577,69 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
-#ifdef NCR53C400
-       if (hostdata->flags & FLAG_NCR53C400) {
-               NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
-       }
-#endif
+       /* Calibrate register polling loop */
+       i = 0;
+       deadline = jiffies + 1;
+       do {
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       deadline += msecs_to_jiffies(256);
+       do {
+               NCR5380_read(STATUS_REG);
+               ++i;
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       hostdata->accesses_per_ms = i / 256;
 
-       /* 
-        * Detect and correct bus wedge problems.
-        *
-        * If the system crashed, it may have crashed in a state 
-        * where a SCSI command was still executing, and the 
-        * SCSI bus is not in a BUS FREE STATE.
-        *
-        * If this is the case, we'll try to abort the currently
-        * established nexus which we know nothing about, and that
-        * failing, do a hard reset of the SCSI bus 
-        */
+       return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int pass;
 
        for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
                switch (pass) {
                case 1:
                case 3:
                case 5:
-                       printk(KERN_INFO "scsi%d: SCSI bus busy, waiting up to five seconds\n", instance->host_no);
-                       timeout = jiffies + 5 * HZ;
-                       NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, 0, 5*HZ);
+                       shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+                       NCR5380_poll_politely(instance,
+                                             STATUS_REG, SR_BSY, 0, 5 * HZ);
                        break;
                case 2:
-                       printk(KERN_WARNING "scsi%d: bus busy, attempting abort\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
                        do_abort(instance);
                        break;
                case 4:
-                       printk(KERN_WARNING "scsi%d: bus busy, attempting reset\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
                        do_reset(instance);
+                       /* Wait after a reset; the SCSI standard calls for
+                        * 250ms, we wait 500ms to be on the safe side.
+                        * But some Toshiba CD-ROMs need ten times that.
+                        */
+                       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+                               msleep(2500);
+                       else
+                               msleep(500);
                        break;
                case 6:
-                       printk(KERN_ERR "scsi%d: bus locked solid or invalid override\n", instance->host_no);
+                       shost_printk(KERN_ERR, instance, "bus locked solid\n");
                        return -ENXIO;
                }
        }
@@ -887,450 +647,513 @@ static int NCR5380_init(struct Scsi_Host *instance, int flags)
 }
 
 /**
- *     NCR5380_exit    -       remove an NCR5380
- *     @instance: adapter to remove
+ * NCR5380_exit - remove an NCR5380
+ * @instance: adapter to remove
+ *
+ * Assumes that no more work can be queued (e.g. by NCR5380_intr).
  */
 
 static void NCR5380_exit(struct Scsi_Host *instance)
 {
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
-       cancel_delayed_work_sync(&hostdata->coroutine);
+       cancel_work_sync(&hostdata->main_task);
+       destroy_workqueue(hostdata->work_q);
 }
 
 /**
- *     NCR5380_queue_command           -       queue a command
- *     @cmd: SCSI command
- *     @done: completion handler
- *
- *      cmd is added to the per instance issue_queue, with minor 
- *      twiddling done to the host specific fields of cmd.  If the 
- *      main coroutine is not running, it is restarted.
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+                         struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+       dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+       if (hostdata->sensing == cmd) {
+               /* Autosense processing ends here */
+               if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+                       set_host_byte(cmd, DID_ERROR);
+               } else
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               hostdata->sensing = NULL;
+       }
+
+       hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+
+       cmd->scsi_done(cmd);
+}
+
+/**
+ * NCR5380_queue_command - queue a command
+ * @instance: the relevant SCSI adapter
+ * @cmd: SCSI command
  *
- *     Locks: host lock taken by caller
+ * cmd is added to the per-instance issue queue, with minor
+ * twiddling done to the host specific fields of cmd.  If the
+ * main coroutine is not running, it is restarted.
  */
 
-static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd, void (*done) (struct scsi_cmnd *))
+static int NCR5380_queue_command(struct Scsi_Host *instance,
+                                 struct scsi_cmnd *cmd)
 {
-       struct Scsi_Host *instance = cmd->device->host;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       struct scsi_cmnd *tmp;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+       unsigned long flags;
 
 #if (NDEBUG & NDEBUG_NO_WRITE)
        switch (cmd->cmnd[0]) {
        case WRITE_6:
        case WRITE_10:
-               printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", instance->host_no);
+               shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
                cmd->result = (DID_ERROR << 16);
-               done(cmd);
+               cmd->scsi_done(cmd);
                return 0;
        }
-#endif                         /* (NDEBUG & NDEBUG_NO_WRITE) */
+#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
-       /* 
-        * We use the host_scribble field as a pointer to the next command  
-        * in a queue 
-        */
-
-       cmd->host_scribble = NULL;
-       cmd->scsi_done = done;
        cmd->result = 0;
 
-       /* 
-        * Insert the cmd into the issue queue. Note that REQUEST SENSE 
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+       /*
+        * Insert the cmd into the issue queue. Note that REQUEST SENSE
         * commands are added to the head of the queue since any command will
-        * clear the contingent allegiance condition that exists and the 
+        * clear the contingent allegiance condition that exists and the
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
-               LIST(cmd, hostdata->issue_queue);
-               cmd->host_scribble = (unsigned char *) hostdata->issue_queue;
-               hostdata->issue_queue = cmd;
-       } else {
-               for (tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp->host_scribble; tmp = (struct scsi_cmnd *) tmp->host_scribble);
-               LIST(cmd, tmp);
-               tmp->host_scribble = (unsigned char *) cmd;
-       }
-       dprintk(NDEBUG_QUEUES, "scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+       if (cmd->cmnd[0] == REQUEST_SENSE)
+               list_add(&ncmd->list, &hostdata->unissued);
+       else
+               list_add_tail(&ncmd->list, &hostdata->unissued);
+
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
+       dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+                cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
 
-       /* Run the coroutine if it isn't already running. */
        /* Kick off command processing */
-       schedule_delayed_work(&hostdata->coroutine, 0);
+       queue_work(hostdata->work_q, &hostdata->main_task);
        return 0;
 }
 
-static DEF_SCSI_QCMD(NCR5380_queue_command)
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *cmd;
+
+       if (list_empty(&hostdata->autosense)) {
+               list_for_each_entry(ncmd, &hostdata->unissued, list) {
+                       cmd = NCR5380_to_scmd(ncmd);
+                       dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+                                cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+                       if (!(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))) {
+                               list_del(&ncmd->list);
+                               dsprintk(NDEBUG_QUEUES, instance,
+                                        "dequeue: removed %p from issue queue\n", cmd);
+                               return cmd;
+                       }
+               }
+       } else {
+               /* Autosense processing begins here */
+               ncmd = list_first_entry(&hostdata->autosense,
+                                       struct NCR5380_cmd, list);
+               list_del(&ncmd->list);
+               cmd = NCR5380_to_scmd(ncmd);
+               dsprintk(NDEBUG_QUEUES, instance,
+                        "dequeue: removed %p from autosense queue\n", cmd);
+               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+               hostdata->sensing = cmd;
+               return cmd;
+       }
+       return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+       if (hostdata->sensing) {
+               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               list_add(&ncmd->list, &hostdata->autosense);
+               hostdata->sensing = NULL;
+       } else
+               list_add(&ncmd->list, &hostdata->unissued);
+}
 
 /**
- *     NCR5380_main    -       NCR state machines
- *
- *     NCR5380_main is a coroutine that runs as long as more work can 
- *      be done on the NCR5380 host adapters in a system.  Both 
- *      NCR5380_queue_command() and NCR5380_intr() will try to start it 
- *      in case it is not running.
- * 
- *     Locks: called as its own thread with no locks held. Takes the
- *     host lock and called routines may take the isa dma lock.
+ * NCR5380_main - NCR state machines
+ *
+ * NCR5380_main is a coroutine that runs as long as more work can
+ * be done on the NCR5380 host adapters in a system.  Both
+ * NCR5380_queue_command() and NCR5380_intr() will try to start it
+ * in case it is not running.
  */
 
 static void NCR5380_main(struct work_struct *work)
 {
        struct NCR5380_hostdata *hostdata =
-               container_of(work, struct NCR5380_hostdata, coroutine.work);
+               container_of(work, struct NCR5380_hostdata, main_task);
        struct Scsi_Host *instance = hostdata->host;
-       struct scsi_cmnd *tmp, *prev;
+       struct scsi_cmnd *cmd;
        int done;
-       
-       spin_lock_irq(instance->host_lock);
+
        do {
-               /* Lock held here */
                done = 1;
-               if (!hostdata->connected && !hostdata->selecting) {
-                       dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no);
-                       /*
-                        * Search through the issue_queue for a command destined
-                        * for a target that's not busy.
-                        */
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                       {
-                               if (prev != tmp)
-                                   dprintk(NDEBUG_LISTS, "MAIN tmp=%p   target=%d   busy=%d lun=%llu\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
-                               /*  When we find one, remove it from the issue queue. */
-                               if (!(hostdata->busy[tmp->device->id] &
-                                     (1 << (u8)(tmp->device->lun & 0xff)))) {
-                                       if (prev) {
-                                               REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
-                                               prev->host_scribble = tmp->host_scribble;
-                                       } else {
-                                               REMOVE(-1, hostdata->issue_queue, tmp, tmp->host_scribble);
-                                               hostdata->issue_queue = (struct scsi_cmnd *) tmp->host_scribble;
-                                       }
-                                       tmp->host_scribble = NULL;
 
-                                       /* 
-                                        * Attempt to establish an I_T_L nexus here. 
-                                        * On success, instance->hostdata->connected is set.
-                                        * On failure, we must add the command back to the
-                                        *   issue queue so we can keep trying. 
-                                        */
-                                       dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %llu removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun);
-       
-                                       /*
-                                        * A successful selection is defined as one that 
-                                        * leaves us with the command connected and 
-                                        * in hostdata->connected, OR has terminated the
-                                        * command.
-                                        *
-                                        * With successful commands, we fall through
-                                        * and see if we can do an information transfer,
-                                        * with failures we will restart.
-                                        */
-                                       hostdata->selecting = NULL;
-                                       /* RvC: have to preset this to indicate a new command is being performed */
+               spin_lock_irq(&hostdata->lock);
+               while (!hostdata->connected &&
+                      (cmd = dequeue_next_cmd(instance))) {
 
-                                       /*
-                                        * REQUEST SENSE commands are issued without tagged
-                                        * queueing, even on SCSI-II devices because the
-                                        * contingent allegiance condition exists for the
-                                        * entire unit.
-                                        */
+                       dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
 
-                                       if (!NCR5380_select(instance, tmp)) {
-                                               break;
-                                       } else {
-                                               LIST(tmp, hostdata->issue_queue);
-                                               tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
-                                               hostdata->issue_queue = tmp;
-                                               done = 0;
-                                               dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no);
-                                       }
-                                       /* lock held here still */
-                               }       /* if target/lun is not busy */
-                       }       /* for */
-                       /* exited locked */
-               }       /* if (!hostdata->connected) */
-               if (hostdata->selecting) {
-                       tmp = (struct scsi_cmnd *) hostdata->selecting;
-                       /* Selection will drop and retake the lock */
-                       if (!NCR5380_select(instance, tmp)) {
-                               /* Ok ?? */
+                       /*
+                        * Attempt to establish an I_T_L nexus here.
+                        * On success, instance->hostdata->connected is set.
+                        * On failure, we must add the command back to the
+                        * issue queue so we can keep trying.
+                        */
+                       /*
+                        * REQUEST SENSE commands are issued without tagged
+                        * queueing, even on SCSI-II devices because the
+                        * contingent allegiance condition exists for the
+                        * entire unit.
+                        */
+
+                       cmd = NCR5380_select(instance, cmd);
+                       if (!cmd) {
+                               dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
                        } else {
-                               /* RvC: device failed, so we wait a long time
-                                  this is needed for Mustek scanners, that
-                                  do not respond to commands immediately
-                                  after a scan */
-                               printk(KERN_DEBUG "scsi%d: device %d did not respond in time\n", instance->host_no, tmp->device->id);
-                               LIST(tmp, hostdata->issue_queue);
-                               tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
-                               hostdata->issue_queue = tmp;
-                               NCR5380_set_timer(hostdata, USLEEP_WAITLONG);
+                               dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+                                        "main: select failed, returning %p to queue\n", cmd);
+                               requeue_cmd(instance, cmd);
                        }
-               }       /* if hostdata->selecting */
+               }
                if (hostdata->connected
 #ifdef REAL_DMA
                    && !hostdata->dmalen
 #endif
-                   && (!hostdata->time_expires || time_before_eq(hostdata->time_expires, jiffies))
                    ) {
-                       dprintk(NDEBUG_MAIN, "scsi%d : main() : performing information transfer\n", instance->host_no);
+                       dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
                        NCR5380_information_transfer(instance);
-                       dprintk(NDEBUG_MAIN, "scsi%d : main() : done set false\n", instance->host_no);
                        done = 0;
-               } else
-                       break;
+               }
+               spin_unlock_irq(&hostdata->lock);
+               if (!done)
+                       cond_resched();
        } while (!done);
-       
-       spin_unlock_irq(instance->host_lock);
 }
 
 #ifndef DONT_USE_INTR
 
 /**
- *     NCR5380_intr    -       generic NCR5380 irq handler
- *     @irq: interrupt number
- *     @dev_id: device info
- *
- *     Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
- *      from the disconnected queue, and restarting NCR5380_main() 
- *      as required.
- *
- *     Locks: takes the needed instance locks
+ * NCR5380_intr - generic NCR5380 irq handler
+ * @irq: interrupt number
+ * @dev_id: device info
+ *
+ * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
+ * from the disconnected queue, and restarting NCR5380_main()
+ * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
-static irqreturn_t NCR5380_intr(int dummy, void *dev_id)
+static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 {
-       NCR5380_local_declare();
        struct Scsi_Host *instance = dev_id;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       int done;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int handled = 0;
        unsigned char basr;
        unsigned long flags;
 
-       dprintk(NDEBUG_INTR, "scsi : NCR5380 irq %d triggered\n",
-               instance->irq);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+       basr = NCR5380_read(BUS_AND_STATUS_REG);
+       if (basr & BASR_IRQ) {
+               unsigned char mr = NCR5380_read(MODE_REG);
+               unsigned char sr = NCR5380_read(STATUS_REG);
+
+               dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+                        irq, basr, sr, mr);
 
-       do {
-               done = 1;
-               spin_lock_irqsave(instance->host_lock, flags);
-               /* Look for pending interrupts */
-               NCR5380_setup(instance);
-               basr = NCR5380_read(BUS_AND_STATUS_REG);
-               /* XXX dispatch to appropriate routine if found and done=0 */
-               if (basr & BASR_IRQ) {
-                       NCR5380_dprint(NDEBUG_INTR, instance);
-                       if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
-                               done = 0;
-                               dprintk(NDEBUG_INTR, "scsi%d : SEL interrupt\n", instance->host_no);
-                               NCR5380_reselect(instance);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else if (basr & BASR_PARITY_ERROR) {
-                               dprintk(NDEBUG_INTR, "scsi%d : PARITY interrupt\n", instance->host_no);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-                               dprintk(NDEBUG_INTR, "scsi%d : RESET interrupt\n", instance->host_no);
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                       } else {
 #if defined(REAL_DMA)
-                               /*
-                                * We should only get PHASE MISMATCH and EOP interrupts
-                                * if we have DMA enabled, so do a sanity check based on
-                                * the current setting of the MODE register.
-                                */
+               if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+                       /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+                        * We ack IRQ after clearing Mode Register. Workarounds
+                        * for End of DMA errata need to happen in DMA Mode.
+                        */
 
-                               if ((NCR5380_read(MODE_REG) & MR_DMA) && ((basr & BASR_END_DMA_TRANSFER) || !(basr & BASR_PHASE_MATCH))) {
-                                       int transferred;
+                       dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
 
-                                       if (!hostdata->connected)
-                                               panic("scsi%d : received end of DMA interrupt with no connected cmd\n", instance->hostno);
+                       int transferred;
 
-                                       transferred = (hostdata->dmalen - NCR5380_dma_residual(instance));
-                                       hostdata->connected->SCp.this_residual -= transferred;
-                                       hostdata->connected->SCp.ptr += transferred;
-                                       hostdata->dmalen = 0;
+                       if (!hostdata->connected)
+                               panic("scsi%d : DMA interrupt with no connected cmd\n",
+                                     instance->hostno);
 
-                                       (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-                                                       
-                                       /* FIXME: we need to poll briefly then defer a workqueue task ! */
-                                       NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2*HZ);
+                       transferred = hostdata->dmalen - NCR5380_dma_residual(instance);
+                       hostdata->connected->SCp.this_residual -= transferred;
+                       hostdata->connected->SCp.ptr += transferred;
+                       hostdata->dmalen = 0;
 
-                                       NCR5380_write(MODE_REG, MR_BASE);
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                               }
-#else
-                               dprintk(NDEBUG_INTR, "scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
-                               (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-#endif
+                       /* FIXME: we need to poll briefly then defer a workqueue task ! */
+                       NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2 * HZ);
+
+                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+                       NCR5380_write(MODE_REG, MR_BASE);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+               } else
+#endif /* REAL_DMA */
+               if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+                   (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+                       /* Probably reselected */
+                       NCR5380_write(SELECT_ENABLE_REG, 0);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+                       if (!hostdata->connected) {
+                               NCR5380_reselect(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
                        }
-               }       /* if BASR_IRQ */
-               spin_unlock_irqrestore(instance->host_lock, flags);
-               if(!done)
-                       schedule_delayed_work(&hostdata->coroutine, 0);
-       } while (!done);
-       return IRQ_HANDLED;
+                       if (!hostdata->connected)
+                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               } else {
+                       /* Probably Bus Reset */
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+               }
+               handled = 1;
+       } else {
+               shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
+       }
+
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
+       return IRQ_RETVAL(handled);
 }
 
-#endif 
+#endif
 
-/* 
+/*
  * Function : int NCR5380_select(struct Scsi_Host *instance,
- *                               struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
  *
  * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- *      including ARBITRATION, SELECTION, and initial message out for 
- *      IDENTIFY and queue messages. 
- *
- * Inputs : instance - instantiation of the 5380 driver on which this 
- *      target lives, cmd - SCSI command to execute.
- * 
- * Returns : -1 if selection could not execute for some reason,
- *      0 if selection succeeded or failed because the target 
- *      did not respond.
- *
- * Side effects : 
- *      If bus busy, arbitration failed, etc, NCR5380_select() will exit 
- *              with registers as they should have been on entry - ie
- *              SELECT_ENABLE will be set appropriately, the NCR5380
- *              will cease to drive any SCSI bus signals.
- *
- *      If successful : I_T_L or I_T_L_Q nexus will be established, 
- *              instance->connected will be set to cmd.  
- *              SELECT interrupt will be disabled.
- *
- *      If failed (no target) : cmd->scsi_done() will be called, and the 
- *              cmd->result host byte set to DID_BAD_TARGET.
- *
- *     Locks: caller holds hostdata lock in IRQ mode
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
+ *
+ * Inputs : instance - instantiation of the 5380 driver on which this
+ * target lives, cmd - SCSI command to execute.
+ *
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
+ *
+ * Side effects :
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
+ *
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
+ *
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
  */
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char tmp[3], phase;
        unsigned char *data;
        int len;
-       unsigned long timeout;
-       unsigned char value;
        int err;
-       NCR5380_setup(instance);
-
-       if (hostdata->selecting)
-               goto part2;
-
-       hostdata->restart_select = 0;
 
        NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
+       dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+                instance->this_id);
+
+       /*
+        * Arbitration and selection phases are slow and involve dropping the
+        * lock, so we have to watch out for EH. An exception handler may
+        * change 'selecting' to NULL. This function will then return NULL
+        * so that the caller will forget about 'cmd'. (During information
+        * transfer phases, EH may change 'connected' to NULL.)
+        */
+       hostdata->selecting = cmd;
 
-       /* 
-        * Set the phase bits to 0, otherwise the NCR5380 won't drive the 
+       /*
+        * Set the phase bits to 0, otherwise the NCR5380 won't drive the
         * data bus during SELECTION.
         */
 
        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-       /* 
+       /*
         * Start arbitration.
         */
 
        NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
        NCR5380_write(MODE_REG, MR_ARBITRATE);
 
+       /* The chip now waits for BUS FREE phase. Then after the 800 ns
+        * Bus Free Delay, arbitration will begin.
+        */
 
-       /* We can be relaxed here, interrupts are on, we are
-          in workqueue context, the birds are singing in the trees */
-       spin_unlock_irq(instance->host_lock);
-       err = NCR5380_poll_politely(instance, INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS, ICR_ARBITRATION_PROGRESS, 5*HZ);
-       spin_lock_irq(instance->host_lock);
+       spin_unlock_irq(&hostdata->lock);
+       err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+                       INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+                                              ICR_ARBITRATION_PROGRESS, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+               /* Reselection interrupt */
+               goto out;
+       }
        if (err < 0) {
-               printk(KERN_DEBUG "scsi: arbitration timeout at %d\n", __LINE__);
                NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               goto failed;
+               shost_printk(KERN_ERR, instance,
+                            "select: arbitration timeout\n");
+               goto out;
        }
+       spin_unlock_irq(&hostdata->lock);
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : arbitration complete\n", instance->host_no);
-
-       /* 
-        * The arbitration delay is 2.2us, but this is a minimum and there is 
-        * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
-        * the integral nature of udelay().
-        *
-        */
-
+       /* The SCSI-2 arbitration delay is 2.4 us */
        udelay(3);
 
        /* Check for lost arbitration */
-       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) || (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) || (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
-               NCR5380_write(MODE_REG, MR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
-               goto failed;
-       }
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_SEL);
-
-       if (!(hostdata->flags & FLAG_DTC3181E) &&
-           /* RvC: DTC3181E has some trouble with this
-            *      so we simply removed it. Seems to work with
-            *      only Mustek scanner attached
-            */
+       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
+           (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
            (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
                NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no);
-               goto failed;
+               dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+               spin_lock_irq(&hostdata->lock);
+               goto out;
        }
-       /* 
-        * Again, bus clear + bus settle time is 1.2us, however, this is 
+
+       /* After/during arbitration, BSY should be asserted.
+        * IBM DPES-31080 Version S31Q works now
+        * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+        */
+       NCR5380_write(INITIATOR_COMMAND_REG,
+                     ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
+
+       /*
+        * Again, bus clear + bus settle time is 1.2us, however, this is
         * a minimum so we'll udelay ceil(1.2)
         */
 
-       udelay(2);
+       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+               udelay(15);
+       else
+               udelay(2);
+
+       spin_lock_irq(&hostdata->lock);
+
+       /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+               goto out;
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
+       if (!hostdata->selecting) {
+               NCR5380_write(MODE_REG, MR_BASE);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               goto out;
+       }
 
-       /* 
-        * Now that we have won arbitration, start Selection process, asserting 
+       dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
+
+       /*
+        * Now that we have won arbitration, start Selection process, asserting
         * the host and target ID's on the SCSI bus.
         */
 
-       NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << scmd_id(cmd))));
+       NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
 
-       /* 
+       /*
         * Raise ATN while SEL is true before BSY goes false from arbitration,
         * since this is the only way to guarantee that we'll get a MESSAGE OUT
         * phase immediately after selection.
         */
 
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
        NCR5380_write(MODE_REG, MR_BASE);
 
-       /* 
+       /*
         * Reselect interrupts must be turned off prior to the dropping of BSY,
         * otherwise we will trigger an interrupt.
         */
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       spin_unlock_irq(&hostdata->lock);
+
        /*
-        * The initiator shall then wait at least two deskew delays and release 
+        * The initiator shall then wait at least two deskew delays and release
         * the BSY signal.
         */
-       udelay(1);              /* wingel -- wait two bus deskew delay >2*45ns */
+       udelay(1);        /* wingel -- wait two bus deskew delay >2*45ns */
 
        /* Reset BSY */
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+                     ICR_ASSERT_ATN | ICR_ASSERT_SEL);
 
-       /* 
+       /*
         * Something weird happens when we cease to drive BSY - looks
-        * like the board/chip is letting us do another read before the 
+        * like the board/chip is letting us do another read before the
         * appropriate propagation delay has expired, and we're confusing
         * a BSY signal from ourselves as the target's response to SELECTION.
         *
         * A small delay (the 'C++' frontend breaks the pipeline with an
         * unnecessary jump, making it work on my 386-33/Trantor T128, the
-        * tighter 'C' code breaks and requires this) solves the problem - 
-        * the 1 us delay is arbitrary, and only used because this delay will 
-        * be the same on other platforms and since it works here, it should 
+        * tighter 'C' code breaks and requires this) solves the problem -
+        * the 1 us delay is arbitrary, and only used because this delay will
+        * be the same on other platforms and since it works here, it should
         * work there.
         *
         * wingel suggests that this could be due to failing to wait
@@ -1339,50 +1162,43 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        udelay(1);
 
-       dprintk(NDEBUG_SELECTION, "scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd));
+       dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
 
-       /* 
-        * The SCSI specification calls for a 250 ms timeout for the actual 
+       /*
+        * The SCSI specification calls for a 250 ms timeout for the actual
         * selection.
         */
 
-       timeout = jiffies + msecs_to_jiffies(250);
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+                                   msecs_to_jiffies(250));
 
-       /* 
-        * XXX very interesting - we're seeing a bounce where the BSY we 
-        * asserted is being reflected / still asserted (propagation delay?)
-        * and it's detecting as true.  Sigh.
-        */
-
-       hostdata->select_time = 0;      /* we count the clock ticks at which we polled */
-       hostdata->selecting = cmd;
-
-part2:
-       /* RvC: here we enter after a sleeping period, or immediately after
-          execution of part 1
-          we poll only once ech clock tick */
-       value = NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO);
-
-       if (!value && (hostdata->select_time < HZ/4)) {
-               /* RvC: we still must wait for a device response */
-               hostdata->select_time++;        /* after 25 ticks the device has failed */
-               NCR5380_set_timer(hostdata, 1);
-               return 0;       /* RvC: we return here with hostdata->selecting set,
-                                  to go to sleep */
-       }
-
-       hostdata->selecting = NULL;/* clear this pointer, because we passed the
-                                          waiting period */
        if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+               spin_lock_irq(&hostdata->lock);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_reselect(instance);
-               printk("scsi%d : reselection after won arbitration?\n", instance->host_no);
+               if (!hostdata->connected)
+                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+               goto out;
+       }
+
+       if (err < 0) {
+               spin_lock_irq(&hostdata->lock);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return -1;
+               /* Can't touch cmd if it has been reclaimed by the scsi ML */
+               if (hostdata->selecting) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       complete_cmd(instance, cmd);
+                       dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+                       cmd = NULL;
+               }
+               goto out;
        }
-       /* 
-        * No less than two deskew delays after the initiator detects the 
-        * BSY signal is true, it shall release the SEL signal and may 
+
+       /*
+        * No less than two deskew delays after the initiator detects the
+        * BSY signal is true, it shall release the SEL signal and may
         * change the DATA BUS.                                     -wingel
         */
 
@@ -1390,53 +1206,38 @@ part2:
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
-       if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               if (hostdata->targets_present & (1 << scmd_id(cmd))) {
-                       printk(KERN_DEBUG "scsi%d : weirdness\n", instance->host_no);
-                       if (hostdata->restart_select)
-                               printk(KERN_DEBUG "\trestart select\n");
-                       NCR5380_dprint(NDEBUG_SELECTION, instance);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
-               cmd->result = DID_BAD_TARGET << 16;
-               cmd->scsi_done(cmd);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return 0;
-       }
-       hostdata->targets_present |= (1 << scmd_id(cmd));
-
        /*
-        * Since we followed the SCSI spec, and raised ATN while SEL 
+        * Since we followed the SCSI spec, and raised ATN while SEL
         * was true but before BSY was false during selection, the information
         * transfer phase should be a MESSAGE OUT phase so that we can send the
         * IDENTIFY message.
-        * 
+        *
         * If SCSI-II tagged queuing is enabled, we also send a SIMPLE_QUEUE_TAG
         * message (2 bytes) with a tag ID that we increment with every command
         * until it wraps back to 0.
         *
         * XXX - it turns out that there are some broken SCSI-II devices,
-        *       which claim to support tagged queuing but fail when more than
-        *       some number of commands are issued at once.
+        * which claim to support tagged queuing but fail when more than
+        * some number of commands are issued at once.
         */
 
        /* Wait for start of REQ/ACK handshake */
 
-       spin_unlock_irq(instance->host_lock);
        err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
-       spin_lock_irq(instance->host_lock);
-       
-       if(err) {
-               printk(KERN_ERR "scsi%d: timeout at NCR5380.c:%d\n", instance->host_no, __LINE__);
+       spin_lock_irq(&hostdata->lock);
+       if (err < 0) {
+               shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               goto failed;
+               goto out;
+       }
+       if (!hostdata->selecting) {
+               do_abort(instance);
+               goto out;
        }
 
-       dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
+       dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+                scmd_id(cmd));
        tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun);
 
        len = 1;
@@ -1446,104 +1247,82 @@ part2:
        data = tmp;
        phase = PHASE_MSGOUT;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
-       dprintk(NDEBUG_SELECTION, "scsi%d : nexus established.\n", instance->host_no);
+       dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
        /* XXX need to handle errors here */
+
        hostdata->connected = cmd;
-       hostdata->busy[cmd->device->id] |= (1 << (cmd->device->lun & 0xFF));
+       hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 
        initialize_SCp(cmd);
 
-       return 0;
-
-       /* Selection failed */
-failed:
-       return -1;
+       cmd = NULL;
 
+out:
+       if (!hostdata->selecting)
+               return NULL;
+       hostdata->selecting = NULL;
+       return cmd;
 }
 
-/* 
- * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance, 
- *      unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using polled I/O
  *
- * Inputs : instance - instance of driver, *phase - pointer to 
- *      what phase is expected, *count - pointer to number of 
- *      bytes to transfer, **data - pointer to data pointer.
- * 
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
+ *
  * Returns : -1 when different phase is entered without transferring
- *      maximum number of bytes, 0 if all bytes or transferred or exit
- *      is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
  *
- *      Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
  *
  * XXX Note : handling for bus free may be useful.
  */
 
 /*
- * Note : this code is not as quick as it could be, however it 
+ * Note : this code is not as quick as it could be, however it
  * IS 100% reliable, and for the actual data transfer where speed
  * counts, we will always do a pseudo DMA or DMA transfer.
  */
 
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
-       NCR5380_local_declare();
+static int NCR5380_transfer_pio(struct Scsi_Host *instance,
+                               unsigned char *phase, int *count,
+                               unsigned char **data)
+{
        unsigned char p = *phase, tmp;
        int c = *count;
        unsigned char *d = *data;
-       /*
-        *      RvC: some administrative data to process polling time
-        */
-       int break_allowed = 0;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       NCR5380_setup(instance);
-
-       if (!(p & SR_IO))
-               dprintk(NDEBUG_PIO, "scsi%d : pio write %d bytes\n", instance->host_no, c);
-       else
-               dprintk(NDEBUG_PIO, "scsi%d : pio read %d bytes\n", instance->host_no, c);
 
-       /* 
-        * The NCR5380 chip will only drive the SCSI bus when the 
+       /*
+        * The NCR5380 chip will only drive the SCSI bus when the
         * phase specified in the appropriate bits of the TARGET COMMAND
         * REGISTER match the STATUS REGISTER
         */
 
-        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-       /* RvC: don't know if this is necessary, but other SCSI I/O is short
-        *      so breaks are not necessary there
-        */
-       if ((p == PHASE_DATAIN) || (p == PHASE_DATAOUT)) {
-               break_allowed = 1;
-       }
-       do {
-               /* 
-                * Wait for assertion of REQ, after which the phase bits will be 
-                * valid 
-                */
+       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
 
-               /* RvC: we simply poll once, after that we stop temporarily
-                *      and let the device buffer fill up
-                *      if breaking is not allowed, we keep polling as long as needed
+       do {
+               /*
+                * Wait for assertion of REQ, after which the phase bits will be
+                * valid
                 */
 
-               /* FIXME */
-               while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ) && !break_allowed);
-               if (!(tmp & SR_REQ)) {
-                       /* timeout condition */
-                       NCR5380_set_timer(hostdata, USLEEP_SLEEP);
+               if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
                        break;
-               }
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d : REQ detected\n", instance->host_no);
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
 
                /* Check for phase mismatch */
-               if ((tmp & PHASE_MASK) != p) {
-                       dprintk(NDEBUG_HANDSHAKE, "scsi%d : phase mismatch\n", instance->host_no);
-                       NCR5380_dprint_phase(NDEBUG_HANDSHAKE, instance);
+               if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+                       dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
+                       NCR5380_dprint_phase(NDEBUG_PIO, instance);
                        break;
                }
+
                /* Do actual transfer from SCSI bus to / from memory */
                if (!(p & SR_IO))
                        NCR5380_write(OUTPUT_DATA_REG, *d);
@@ -1552,7 +1331,7 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
 
                ++d;
 
-               /* 
+               /*
                 * The SCSI standard suggests that in MSGOUT phase, the initiator
                 * should drop ATN on the last byte of the message phase
                 * after REQ has been asserted for the handshake but before
@@ -1563,29 +1342,34 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
                        if (!((p & SR_MSG) && c > 1)) {
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
                                NCR5380_dprint(NDEBUG_PIO, instance);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
                        } else {
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
                                NCR5380_dprint(NDEBUG_PIO, instance);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
                        }
                } else {
                        NCR5380_dprint(NDEBUG_PIO, instance);
                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
                }
 
-               /* FIXME - if this fails bus reset ?? */
-               NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 5*HZ);
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d : req false, handshake complete\n", instance->host_no);
+               if (NCR5380_poll_politely(instance,
+                                         STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+                       break;
+
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
 
 /*
- * We have several special cases to consider during REQ/ACK handshaking : 
- * 1.  We were in MSGOUT phase, and we are on the last byte of the 
- *      message.  ATN must be dropped as ACK is dropped.
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1.  We were in MSGOUT phase, and we are on the last byte of the
+ * message.  ATN must be dropped as ACK is dropped.
  *
- * 2.  We are in a MSGIN phase, and we are on the last byte of the  
- *      message.  We must exit with ACK asserted, so that the calling
- *      code may raise ATN before dropping ACK to reject the message.
+ * 2.  We are in a MSGIN phase, and we are on the last byte of the
+ * message.  We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
  *
  * 3.  ACK and ATN are clear and the target may proceed as normal.
  */
@@ -1597,12 +1381,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
                }
        } while (--c);
 
-       dprintk(NDEBUG_PIO, "scsi%d : residual %d\n", instance->host_no, c);
+       dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
 
        *count = c;
        *data = d;
        tmp = NCR5380_read(STATUS_REG);
-       if (tmp & SR_REQ)
+       /* The phase read from the bus is valid if either REQ is (already)
+        * asserted or if ACK hasn't been released yet. The latter applies if
+        * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
+        */
+       if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
                *phase = tmp & PHASE_MASK;
        else
                *phase = PHASE_UNKNOWN;
@@ -1614,79 +1402,80 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase
 }
 
 /**
- *     do_reset        -       issue a reset command
- *     @host: adapter to reset
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
  *
- *     Issue a reset sequence to the NCR5380 and try and get the bus
- *     back into sane shape.
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
  *
- *     Locks: caller holds queue lock
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
  */
-static void do_reset(struct Scsi_Host *host) {
-       NCR5380_local_declare();
-       NCR5380_setup(host);
 
-       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+static void do_reset(struct Scsi_Host *instance)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       NCR5380_write(TARGET_COMMAND_REG,
+                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       udelay(25);
+       udelay(50);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       local_irq_restore(flags);
 }
 
-/*
- * Function : do_abort (Scsi_Host *host)
- * 
- * Purpose : abort the currently established nexus.  Should only be 
- *      called from a routine which can drop into a 
- * 
- * Returns : 0 on success, -1 on failure.
- *
- * Locks: queue lock held by caller
- *     FIXME: sort this out and get new_eh running
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
+ *
+ * Returns 0 on success, -1 on failure.
  */
 
-static int do_abort(struct Scsi_Host *host) {
-       NCR5380_local_declare();
+static int do_abort(struct Scsi_Host *instance)
+{
        unsigned char *msgptr, phase, tmp;
        int len;
        int rc;
-       NCR5380_setup(host);
-
 
        /* Request message out phase */
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
-       /* 
-        * Wait for the target to indicate a valid phase by asserting 
-        * REQ.  Once this happens, we'll have either a MSGOUT phase 
-        * and can immediately send the ABORT message, or we'll have some 
+       /*
+        * Wait for the target to indicate a valid phase by asserting
+        * REQ.  Once this happens, we'll have either a MSGOUT phase
+        * and can immediately send the ABORT message, or we'll have some
         * other phase and will have to source/sink data.
-        * 
+        *
         * We really don't care what value was on the bus or what value
         * the target sees, so we just handshake.
         */
 
-       rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, SR_REQ, 60 * HZ);
-       
-       if(rc < 0)
-               return -1;
+       rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+       if (rc < 0)
+               goto timeout;
+
+       tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
-       tmp = (unsigned char)rc;
-       
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-       if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-               rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, 0, 3*HZ);
+       if (tmp != PHASE_MSGOUT) {
+               NCR5380_write(INITIATOR_COMMAND_REG,
+                             ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+               rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+               if (rc < 0)
+                       goto timeout;
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-               if(rc == -1)
-                       return -1;
        }
+
        tmp = ABORT;
        msgptr = &tmp;
        len = 1;
        phase = PHASE_MSGOUT;
-       NCR5380_transfer_pio(host, &phase, &len, &msgptr);
+       NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
 
        /*
         * If we got here, and the command completed successfully,
@@ -1694,32 +1483,37 @@ static int do_abort(struct Scsi_Host *host) {
         */
 
        return len ? -1 : 0;
+
+timeout:
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       return -1;
 }
 
 #if defined(REAL_DMA) || defined(PSEUDO_DMA) || defined (REAL_DMA_POLL)
-/* 
- * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance, 
- *      unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using either real
- *      or pseudo DMA.
+ * or pseudo DMA.
  *
- * Inputs : instance - instance of driver, *phase - pointer to 
- *      what phase is expected, *count - pointer to number of 
- *      bytes to transfer, **data - pointer to data pointer.
- * 
- * Returns : -1 when different phase is entered without transferring
- *      maximum number of bytes, 0 if all bytes or transferred or exit
- *      is in same phase.
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
- *      Also, *phase, *count, *data are modified in place.
+ * Returns : -1 when different phase is entered without transferring
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
  *
- *     Locks: io_request lock held by caller
+ * Also, *phase, *count, *data are modified in place.
  */
 
 
-static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
-       NCR5380_local_declare();
+static int NCR5380_transfer_dma(struct Scsi_Host *instance,
+                               unsigned char *phase, int *count,
+                               unsigned char **data)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        register int c = *count;
        register unsigned char p = *phase;
        register unsigned char *d = *data;
@@ -1730,54 +1524,47 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        unsigned char saved_data = 0, overrun = 0, residue;
 #endif
 
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
-       NCR5380_setup(instance);
-
        if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
                *phase = tmp;
                return -1;
        }
 #if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-#ifdef READ_OVERRUNS
        if (p & SR_IO) {
-               c -= 2;
+               if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS))
+                       c -= 2;
        }
-#endif
-       dprintk(NDEBUG_DMA, "scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d);
        hostdata->dma_len = (p & SR_IO) ? NCR5380_dma_read_setup(instance, d, c) : NCR5380_dma_write_setup(instance, d, c);
+
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, *data);
 #endif
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
 
 #ifdef REAL_DMA
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
 #elif defined(REAL_DMA_POLL)
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 #else
        /*
         * Note : on my sample board, watch-dog timeouts occurred when interrupts
-        * were not disabled for the duration of a single DMA transfer, from 
+        * were not disabled for the duration of a single DMA transfer, from
         * before the setting of DMA mode to after transfer of the last byte.
         */
 
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
-       spin_unlock_irq(instance->host_lock);
-#endif
-       /* KLL May need eop and parity in 53c400 */
-       if (hostdata->flags & FLAG_NCR53C400)
-               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE |
-                               MR_ENABLE_PAR_CHECK | MR_ENABLE_PAR_INTR |
-                               MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+       if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                                       MR_ENABLE_EOP_INTR);
        else
-               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+               NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 #endif                         /* def REAL_DMA */
 
        dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG));
 
-       /* 
-        *      On the PAS16 at least I/O recovery delays are not needed here.
-        *      Everyone else seems to want them.
+       /*
+        * On the PAS16 at least I/O recovery delays are not needed here.
+        * Everyone else seems to want them.
         */
 
        if (p & SR_IO) {
@@ -1797,49 +1584,49 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        } while ((tmp & BASR_PHASE_MATCH) && !(tmp & (BASR_BUSY_ERROR | BASR_END_DMA_TRANSFER)));
 
 /*
-   At this point, either we've completed DMA, or we have a phase mismatch,
-   or we've unexpectedly lost BUSY (which is a real error).
-
-   For write DMAs, we want to wait until the last byte has been
-   transferred out over the bus before we turn off DMA mode.  Alas, there
-   seems to be no terribly good way of doing this on a 5380 under all
-   conditions.  For non-scatter-gather operations, we can wait until REQ
-   and ACK both go false, or until a phase mismatch occurs.  Gather-writes
-   are nastier, since the device will be expecting more data than we
-   are prepared to send it, and REQ will remain asserted.  On a 53C8[01] we
-   could test LAST BIT SENT to assure transfer (I imagine this is precisely
-   why this signal was added to the newer chips) but on the older 538[01]
-   this signal does not exist.  The workaround for this lack is a watchdog;
-   we bail out of the wait-loop after a modest amount of wait-time if
-   the usual exit conditions are not met.  Not a terribly clean or
-   correct solution :-%
-
-   Reads are equally tricky due to a nasty characteristic of the NCR5380.
-   If the chip is in DMA mode for an READ, it will respond to a target's
-   REQ by latching the SCSI data into the INPUT DATA register and asserting
-   ACK, even if it has _already_ been notified by the DMA controller that
-   the current DMA transfer has completed!  If the NCR5380 is then taken
-   out of DMA mode, this already-acknowledged byte is lost.
-
-   This is not a problem for "one DMA transfer per command" reads, because
-   the situation will never arise... either all of the data is DMA'ed
-   properly, or the target switches to MESSAGE IN phase to signal a
-   disconnection (either operation bringing the DMA to a clean halt).
-   However, in order to handle scatter-reads, we must work around the
-   problem.  The chosen fix is to DMA N-2 bytes, then check for the
-   condition before taking the NCR5380 out of DMA mode.  One or two extra
-   bytes are transferred via PIO as necessary to fill out the original
-   request.
+ * At this point, either we've completed DMA, or we have a phase mismatch,
+ * or we've unexpectedly lost BUSY (which is a real error).
+ *
+ * For DMA sends, we want to wait until the last byte has been
+ * transferred out over the bus before we turn off DMA mode.  Alas, there
+ * seems to be no terribly good way of doing this on a 5380 under all
+ * conditions.  For non-scatter-gather operations, we can wait until REQ
+ * and ACK both go false, or until a phase mismatch occurs.  Gather-sends
+ * are nastier, since the device will be expecting more data than we
+ * are prepared to send it, and REQ will remain asserted.  On a 53C8[01] we
+ * could test Last Byte Sent to assure transfer (I imagine this is precisely
+ * why this signal was added to the newer chips) but on the older 538[01]
+ * this signal does not exist.  The workaround for this lack is a watchdog;
+ * we bail out of the wait-loop after a modest amount of wait-time if
+ * the usual exit conditions are not met.  Not a terribly clean or
+ * correct solution :-%
+ *
+ * DMA receive is equally tricky due to a nasty characteristic of the NCR5380.
+ * If the chip is in DMA receive mode, it will respond to a target's
+ * REQ by latching the SCSI data into the INPUT DATA register and asserting
+ * ACK, even if it has _already_ been notified by the DMA controller that
+ * the current DMA transfer has completed!  If the NCR5380 is then taken
+ * out of DMA mode, this already-acknowledged byte is lost. This is
+ * not a problem for "one DMA transfer per READ command", because
+ * the situation will never arise... either all of the data is DMA'ed
+ * properly, or the target switches to MESSAGE IN phase to signal a
+ * disconnection (either operation bringing the DMA to a clean halt).
+ * However, in order to handle scatter-receive, we must work around the
+ * problem.  The chosen fix is to DMA N-2 bytes, then check for the
+ * condition before taking the NCR5380 out of DMA mode.  One or two extra
+ * bytes are transferred via PIO as necessary to fill out the original
+ * request.
  */
 
        if (p & SR_IO) {
-#ifdef READ_OVERRUNS
-               udelay(10);
-               if (((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) == (BASR_PHASE_MATCH | BASR_ACK))) {
-                       saved_data = NCR5380_read(INPUT_DATA_REGISTER);
-                       overrun = 1;
+               if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS)) {
+                       udelay(10);
+                       if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) ==
+                           (BASR_PHASE_MATCH | BASR_ACK)) {
+                               saved_data = NCR5380_read(INPUT_DATA_REGISTER);
+                               overrun = 1;
+                       }
                }
-#endif
        } else {
                int limit = 100;
                while (((tmp = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_ACK) || (NCR5380_read(STATUS_REG) & SR_REQ)) {
@@ -1850,7 +1637,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
                }
        }
 
-       dprintk(NDEBUG_DMA, "scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG));
+       dsprintk(NDEBUG_DMA, "polled DMA transfer complete, basr 0x%02x, sr 0x%02x\n",
+                tmp, NCR5380_read(STATUS_REG));
 
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
@@ -1861,8 +1649,8 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        *data += c;
        *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
-#ifdef READ_OVERRUNS
-       if (*phase == p && (p & SR_IO) && residue == 0) {
+       if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS) &&
+           *phase == p && (p & SR_IO) && residue == 0) {
                if (overrun) {
                        dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
                        **data = saved_data;
@@ -1877,7 +1665,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
                NCR5380_transfer_pio(instance, phase, &cnt, data);
                *count -= toPIO - cnt;
        }
-#endif
 
        dprintk(NDEBUG_DMA, "Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count));
        return 0;
@@ -1886,95 +1673,64 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
        return 0;
 #else                          /* defined(REAL_DMA_POLL) */
        if (p & SR_IO) {
-#ifdef DMA_WORKS_RIGHT
-               foo = NCR5380_pread(instance, d, c);
-#else
-               int diff = 1;
-               if (hostdata->flags & FLAG_NCR53C400) {
-                       diff = 0;
-               }
-               if (!(foo = NCR5380_pread(instance, d, c - diff))) {
+               foo = NCR5380_pread(instance, d,
+                       hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
+               if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
                        /*
-                        * We can't disable DMA mode after successfully transferring 
+                        * We can't disable DMA mode after successfully transferring
                         * what we plan to be the last byte, since that would open up
-                        * a race condition where if the target asserted REQ before 
+                        * a race condition where if the target asserted REQ before
                         * we got the DMA mode reset, the NCR5380 would have latched
                         * an additional byte into the INPUT DATA register and we'd
                         * have dropped it.
-                        * 
-                        * The workaround was to transfer one fewer bytes than we 
-                        * intended to with the pseudo-DMA read function, wait for 
+                        *
+                        * The workaround was to transfer one fewer bytes than we
+                        * intended to with the pseudo-DMA read function, wait for
                         * the chip to latch the last byte, read it, and then disable
                         * pseudo-DMA mode.
-                        * 
+                        *
                         * After REQ is asserted, the NCR5380 asserts DRQ and ACK.
                         * REQ is deasserted when ACK is asserted, and not reasserted
                         * until ACK goes false.  Since the NCR5380 won't lower ACK
                         * until DACK is asserted, which won't happen unless we twiddle
-                        * the DMA port or we take the NCR5380 out of DMA mode, we 
-                        * can guarantee that we won't handshake another extra 
+                        * the DMA port or we take the NCR5380 out of DMA mode, we
+                        * can guarantee that we won't handshake another extra
                         * byte.
                         */
 
-                       if (!(hostdata->flags & FLAG_NCR53C400)) {
-                               while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ));
-                               /* Wait for clean handshake */
-                               while (NCR5380_read(STATUS_REG) & SR_REQ);
-                               d[c - 1] = NCR5380_read(INPUT_DATA_REG);
+                       if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
+                                                 BASR_DRQ, BASR_DRQ, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
                        }
+                       if (NCR5380_poll_politely(instance, STATUS_REG,
+                                                 SR_REQ, 0, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
+                       }
+                       d[c - 1] = NCR5380_read(INPUT_DATA_REG);
                }
-#endif
        } else {
-#ifdef DMA_WORKS_RIGHT
                foo = NCR5380_pwrite(instance, d, c);
-#else
-               int timeout;
-               dprintk(NDEBUG_C400_PWRITE, "About to pwrite %d bytes\n", c);
-               if (!(foo = NCR5380_pwrite(instance, d, c))) {
+               if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
                        /*
-                        * Wait for the last byte to be sent.  If REQ is being asserted for 
-                        * the byte we're interested, we'll ACK it and it will go false.  
+                        * Wait for the last byte to be sent.  If REQ is being asserted for
+                        * the byte we're interested, we'll ACK it and it will go false.
                         */
-                       if (!(hostdata->flags & FLAG_HAS_LAST_BYTE_SENT)) {
-                               timeout = 20000;
-                               while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ) && (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH));
-
-                               if (!timeout)
-                                       dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : timed out on last byte\n", instance->host_no);
-
-                               if (hostdata->flags & FLAG_CHECK_LAST_BYTE_SENT) {
-                                       hostdata->flags &= ~FLAG_CHECK_LAST_BYTE_SENT;
-                                       if (NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT) {
-                                               hostdata->flags |= FLAG_HAS_LAST_BYTE_SENT;
-                                               dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : last byte sent works\n", instance->host_no);
-                                       }
-                               }
-                       } else {
-                               dprintk(NDEBUG_C400_PWRITE, "Waiting for LASTBYTE\n");
-                               while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT));
-                               dprintk(NDEBUG_C400_PWRITE, "Got LASTBYTE\n");
+                       if (NCR5380_poll_politely2(instance,
+                            BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
+                            BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+                               foo = -1;
+                               shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
                        }
                }
-#endif
        }
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
-       if ((!(p & SR_IO)) && (hostdata->flags & FLAG_NCR53C400)) {
-               dprintk(NDEBUG_C400_PWRITE, "53C400w: Checking for IRQ\n");
-               if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_IRQ) {
-                       dprintk(NDEBUG_C400_PWRITE, "53C400w:    got it, reading reset interrupt reg\n");
-                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else {
-                       printk("53C400w:    IRQ NOT THERE!\n");
-               }
-       }
+       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        *data = d + c;
        *count = 0;
        *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
-       spin_lock_irq(instance->host_lock);
-#endif                         /* defined(REAL_DMA_POLL) */
        return foo;
 #endif                         /* def REAL_DMA */
 }
@@ -1983,25 +1739,23 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase
 /*
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
  *
- * Purpose : run through the various SCSI phases and do as the target 
- *      directs us to.  Operates on the currently connected command, 
- *      instance->connected.
+ * Purpose : run through the various SCSI phases and do as the target
+ * directs us to.  Operates on the currently connected command,
+ * instance->connected.
  *
  * Inputs : instance, instance for which we are doing commands
  *
- * Side effects : SCSI things happen, the disconnected queue will be 
- *      modified if a command disconnects, *instance->connected will
- *      change.
+ * Side effects : SCSI things happen, the disconnected queue will be
+ * modified if a command disconnects, *instance->connected will
+ * change.
  *
- * XXX Note : we need to watch for bus free or a reset condition here 
- *      to recover from an unexpected bus free condition.
- *
- * Locks: io_request_lock held by caller in IRQ mode
+ * XXX Note : we need to watch for bus free or a reset condition here
+ * to recover from an unexpected bus free condition.
  */
 
-static void NCR5380_information_transfer(struct Scsi_Host *instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)instance->hostdata;
+static void NCR5380_information_transfer(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char msgout = NOP;
        int sink = 0;
        int len;
@@ -2010,13 +1764,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
 #endif
        unsigned char *data;
        unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
-       struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
-       /* RvC: we need to set the end of the polling time */
-       unsigned long poll_time = jiffies + USLEEP_POLL;
+       struct scsi_cmnd *cmd;
 
-       NCR5380_setup(instance);
+       while ((cmd = hostdata->connected)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-       while (1) {
                tmp = NCR5380_read(STATUS_REG);
                /* We only have a valid SCSI phase when REQ is asserted */
                if (tmp & SR_REQ) {
@@ -2028,24 +1780,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                        if (sink && (phase != PHASE_MSGOUT)) {
                                NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-                               while (NCR5380_read(STATUS_REG) & SR_REQ);
-                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
+                                             ICR_ASSERT_ACK);
+                               while (NCR5380_read(STATUS_REG) & SR_REQ)
+                                       ;
+                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+                                             ICR_ASSERT_ATN);
                                sink = 0;
                                continue;
                        }
+
                        switch (phase) {
-                       case PHASE_DATAIN:
                        case PHASE_DATAOUT:
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
-                               printk("scsi%d : NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n", instance->host_no);
+                               shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
                                sink = 1;
                                do_abort(instance);
                                cmd->result = DID_ERROR << 16;
-                               cmd->scsi_done(cmd);
+                               complete_cmd(instance, cmd);
                                return;
 #endif
-                               /* 
+                       case PHASE_DATAIN:
+                               /*
                                 * If there is no room left in the current buffer in the
                                 * scatter-gather list, move onto the next one.
                                 */
@@ -2055,10 +1811,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        --cmd->SCp.buffers_residual;
                                        cmd->SCp.this_residual = cmd->SCp.buffer->length;
                                        cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-                                       dprintk(NDEBUG_INFORMATION, "scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual);
+                                       dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+                                                cmd->SCp.this_residual,
+                                                cmd->SCp.buffers_residual);
                                }
+
                                /*
-                                * The preferred transfer method is going to be 
+                                * The preferred transfer method is going to be
                                 * PSEUDO-DMA for systems that are strictly PIO,
                                 * since we can let the hardware do the handshaking.
                                 *
@@ -2068,50 +1827,39 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                 */
 
 #if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
-                               /* KLL
-                                * PSEUDO_DMA is defined here. If this is the g_NCR5380
-                                * driver then it will always be defined, so the
-                                * FLAG_NO_PSEUDO_DMA is used to inhibit PDMA in the base
-                                * NCR5380 case.  I think this is a fairly clean solution.
-                                * We supplement these 2 if's with the flag.
-                                */
-#ifdef NCR5380_dma_xfer_len
-                               if (!cmd->device->borken && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && (transfersize = NCR5380_dma_xfer_len(instance, cmd)) != 0) {
-#else
-                               transfersize = cmd->transfersize;
-
-#ifdef LIMIT_TRANSFERSIZE      /* If we have problems with interrupt service */
-                               if (transfersize > 512)
-                                       transfersize = 512;
-#endif                         /* LIMIT_TRANSFERSIZE */
-
-                               if (!cmd->device->borken && transfersize && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && cmd->SCp.this_residual && !(cmd->SCp.this_residual % transfersize)) {
-                                       /* Limit transfers to 32K, for xx400 & xx406
-                                        * pseudoDMA that transfers in 128 bytes blocks. */
-                                       if (transfersize > 32 * 1024)
-                                               transfersize = 32 * 1024;
-#endif
+                               transfersize = 0;
+                               if (!cmd->device->borken &&
+                                   !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+                                       transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+                               if (transfersize) {
                                        len = transfersize;
-                                       if (NCR5380_transfer_dma(instance, &phase, &len, (unsigned char **) &cmd->SCp.ptr)) {
+                                       if (NCR5380_transfer_dma(instance, &phase,
+                                           &len, (unsigned char **)&cmd->SCp.ptr)) {
                                                /*
-                                                * If the watchdog timer fires, all future accesses to this
-                                                * device will use the polled-IO.
+                                                * If the watchdog timer fires, all future
+                                                * accesses to this device will use the
+                                                * polled-IO.
                                                 */
                                                scmd_printk(KERN_INFO, cmd,
-                                                           "switching to slow handshake\n");
+                                                       "switching to slow handshake\n");
                                                cmd->device->borken = 1;
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
                                                sink = 1;
                                                do_abort(instance);
                                                cmd->result = DID_ERROR << 16;
-                                               cmd->scsi_done(cmd);
+                                               complete_cmd(instance, cmd);
                                                /* XXX - need to source or sink data here, as appropriate */
                                        } else
                                                cmd->SCp.this_residual -= transfersize - len;
                                } else
 #endif                         /* defined(PSEUDO_DMA) || defined(REAL_DMA_POLL) */
-                                       NCR5380_transfer_pio(instance, &phase, (int *) &cmd->SCp.this_residual, (unsigned char **)
-                                                            &cmd->SCp.ptr);
+                               {
+                                       spin_unlock_irq(&hostdata->lock);
+                                       NCR5380_transfer_pio(instance, &phase,
+                                                            (int *)&cmd->SCp.this_residual,
+                                                            (unsigned char **)&cmd->SCp.ptr);
+                                       spin_lock_irq(&hostdata->lock);
+                               }
                                break;
                        case PHASE_MSGIN:
                                len = 1;
@@ -2120,101 +1868,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                cmd->SCp.Message = tmp;
 
                                switch (tmp) {
-                                       /*
-                                        * Linking lets us reduce the time required to get the 
-                                        * next command out to the device, hopefully this will
-                                        * mean we don't waste another revolution due to the delays
-                                        * required by ARBITRATION and another SELECTION.
-                                        *
-                                        * In the current implementation proposal, low level drivers
-                                        * merely have to start the next command, pointed to by 
-                                        * next_link, done() is called as with unlinked commands.
-                                        */
-#ifdef LINKED
-                               case LINKED_CMD_COMPLETE:
-                               case LINKED_FLG_CMD_COMPLETE:
-                                       /* Accept message by clearing ACK */
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       /* 
-                                        * Sanity check : A linked command should only terminate with
-                                        * one of these messages if there are more linked commands
-                                        * available.
-                                        */
-                                       if (!cmd->next_link) {
-                                           printk("scsi%d : target %d lun %llu linked command complete, no next_link\n" instance->host_no, cmd->device->id, cmd->device->lun);
-                                               sink = 1;
-                                               do_abort(instance);
-                                               return;
-                                       }
-                                       initialize_SCp(cmd->next_link);
-                                       /* The next command is still part of this process */
-                                       cmd->next_link->tag = cmd->tag;
-                                       cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       cmd->scsi_done(cmd);
-                                       cmd = hostdata->connected;
-                                       break;
-#endif                         /* def LINKED */
                                case ABORT:
                                case COMMAND_COMPLETE:
                                        /* Accept message by clearing ACK */
                                        sink = 1;
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       hostdata->connected = NULL;
-                                       dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d, lun %llu completed\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                       hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
-
-                                       /* 
-                                        * I'm not sure what the correct thing to do here is : 
-                                        * 
-                                        * If the command that just executed is NOT a request 
-                                        * sense, the obvious thing to do is to set the result
-                                        * code to the values of the stored parameters.
-                                        * 
-                                        * If it was a REQUEST SENSE command, we need some way 
-                                        * to differentiate between the failure code of the original
-                                        * and the failure code of the REQUEST sense - the obvious
-                                        * case is success, where we fall through and leave the result
-                                        * code unchanged.
-                                        * 
-                                        * The non-obvious place is where the REQUEST SENSE failed 
-                                        */
-
-                                       if (cmd->cmnd[0] != REQUEST_SENSE)
-                                               cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       else if (status_byte(cmd->SCp.Status) != GOOD)
-                                               cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
-                                       if ((cmd->cmnd[0] == REQUEST_SENSE) &&
-                                               hostdata->ses.cmd_len) {
-                                               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
-                                               hostdata->ses.cmd_len = 0 ;
-                                       }
+                                       dsprintk(NDEBUG_QUEUES, instance,
+                                                "COMMAND COMPLETE %p target %d lun %llu\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
 
-                                       if ((cmd->cmnd[0] != REQUEST_SENSE) && (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
-                                               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-
-                                               dprintk(NDEBUG_AUTOSENSE, "scsi%d : performing request sense\n", instance->host_no);
+                                       hostdata->connected = NULL;
 
-                                               LIST(cmd, hostdata->issue_queue);
-                                               cmd->host_scribble = (unsigned char *)
-                                                   hostdata->issue_queue;
-                                               hostdata->issue_queue = (struct scsi_cmnd *) cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no);
-                                       } else {
-                                               cmd->scsi_done(cmd);
+                                       cmd->result &= ~0xffff;
+                                       cmd->result |= cmd->SCp.Status;
+                                       cmd->result |= cmd->SCp.Message << 8;
+
+                                       if (cmd->cmnd[0] == REQUEST_SENSE)
+                                               complete_cmd(instance, cmd);
+                                       else {
+                                               if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+                                                   cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+                                                       dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+                                                                cmd);
+                                                       list_add_tail(&ncmd->list,
+                                                                     &hostdata->autosense);
+                                               } else
+                                                       complete_cmd(instance, cmd);
                                        }
 
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /* 
-                                        * Restore phase bits to 0 so an interrupted selection, 
+                                       /*
+                                        * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
                                         */
                                        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                case MESSAGE_REJECT:
                                        /* Accept message by clearing ACK */
@@ -2229,38 +1918,33 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        default:
                                                break;
                                        }
-                               case DISCONNECT:{
-                                               /* Accept message by clearing ACK */
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                               cmd->device->disconnect = 1;
-                                               LIST(cmd, hostdata->disconnected_queue);
-                                               cmd->host_scribble = (unsigned char *)
-                                                   hostdata->disconnected_queue;
-                                               hostdata->connected = NULL;
-                                               hostdata->disconnected_queue = cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d lun %llu was moved from connected to" "  the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun);
-                                               /* 
-                                                * Restore phase bits to 0 so an interrupted selection, 
-                                                * arbitration can resume.
-                                                */
-                                               NCR5380_write(TARGET_COMMAND_REG, 0);
-
-                                               /* Enable reselect interrupts */
-                                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                               /* Wait for bus free to avoid nasty timeouts - FIXME timeout !*/
-                                               /* NCR538_poll_politely(instance, STATUS_REG, SR_BSY, 0, 30 * HZ); */
-                                               while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                                       barrier();
-                                               return;
-                                       }
-                                       /* 
+                                       break;
+                               case DISCONNECT:
+                                       /* Accept message by clearing ACK */
+                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+                                       hostdata->connected = NULL;
+                                       list_add(&ncmd->list, &hostdata->disconnected);
+                                       dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+                                                instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
+
+                                       /*
+                                        * Restore phase bits to 0 so an interrupted selection,
+                                        * arbitration can resume.
+                                        */
+                                       NCR5380_write(TARGET_COMMAND_REG, 0);
+
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+                                       return;
+                                       /*
                                         * The SCSI data pointer is *IMPLICITLY* saved on a disconnect
-                                        * operation, in violation of the SCSI spec so we can safely 
+                                        * operation, in violation of the SCSI spec so we can safely
                                         * ignore SAVE/RESTORE pointers calls.
                                         *
-                                        * Unfortunately, some disks violate the SCSI spec and 
+                                        * Unfortunately, some disks violate the SCSI spec and
                                         * don't issue the required SAVE_POINTERS message before
-                                        * disconnecting, and we have to break spec to remain 
+                                        * disconnecting, and we have to break spec to remain
                                         * compatible.
                                         */
                                case SAVE_POINTERS:
@@ -2269,31 +1953,28 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                        break;
                                case EXTENDED_MESSAGE:
-/* 
- * Extended messages are sent in the following format :
- * Byte         
- * 0            EXTENDED_MESSAGE == 1
- * 1            length (includes one byte for code, doesn't 
- *              include first two bytes)
- * 2            code
- * 3..length+1  arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
- * byte, since spi_print_msg() wants the whole thing.  
- */
+                                       /*
+                                        * Start the message buffer with the EXTENDED_MESSAGE
+                                        * byte, since spi_print_msg() wants the whole thing.
+                                        */
                                        extended_msg[0] = EXTENDED_MESSAGE;
                                        /* Accept first byte by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d : receiving extended message\n", instance->host_no);
+
+                                       spin_unlock_irq(&hostdata->lock);
+
+                                       dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
 
                                        len = 2;
                                        data = extended_msg + 1;
                                        phase = PHASE_MSGIN;
                                        NCR5380_transfer_pio(instance, &phase, &len, &data);
+                                       dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+                                                (int)extended_msg[1],
+                                                (int)extended_msg[2]);
 
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]);
-
-                                       if (!len && extended_msg[1] <= (sizeof(extended_msg) - 1)) {
+                                       if (!len && extended_msg[1] > 0 &&
+                                           extended_msg[1] <= sizeof(extended_msg) - 2) {
                                                /* Accept third byte by clearing ACK */
                                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                                len = extended_msg[1] - 1;
@@ -2301,7 +1982,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                                phase = PHASE_MSGIN;
 
                                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                               dprintk(NDEBUG_EXTENDED, "scsi%d : message received, residual %d\n", instance->host_no, len);
+                                               dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+                                                        len);
 
                                                switch (extended_msg[2]) {
                                                case EXTENDED_SDTR:
@@ -2311,34 +1993,42 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                                        tmp = 0;
                                                }
                                        } else if (len) {
-                                               printk("scsi%d: error receiving extended message\n", instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "error receiving extended message\n");
                                                tmp = 0;
                                        } else {
-                                               printk("scsi%d: extended message code %02x length %d is too long\n", instance->host_no, extended_msg[2], extended_msg[1]);
+                                               shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+                                                            extended_msg[2], extended_msg[1]);
                                                tmp = 0;
                                        }
+
+                                       spin_lock_irq(&hostdata->lock);
+                                       if (!hostdata->connected)
+                                               return;
+
                                        /* Fall through to reject message */
 
-                                       /* 
-                                        * If we get something weird that we aren't expecting, 
+                                       /*
+                                        * If we get something weird that we aren't expecting,
                                         * reject it.
                                         */
                                default:
                                        if (!tmp) {
-                                               printk("scsi%d: rejecting message ", instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "rejecting message ");
                                                spi_print_msg(extended_msg);
                                                printk("\n");
                                        } else if (tmp != EXTENDED_MESSAGE)
                                                scmd_printk(KERN_INFO, cmd,
-                                                       "rejecting unknown message %02x\n",tmp);
+                                                           "rejecting unknown message %02x\n",
+                                                           tmp);
                                        else
                                                scmd_printk(KERN_INFO, cmd,
-                                                       "rejecting unknown extended message code %02x, length %d\n", extended_msg[1], extended_msg[0]);
+                                                           "rejecting unknown extended message code %02x, length %d\n",
+                                                           extended_msg[1], extended_msg[0]);
 
                                        msgout = MESSAGE_REJECT;
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
                                        break;
-                               }       /* switch (tmp) */
+                               } /* switch (tmp) */
                                break;
                        case PHASE_MSGOUT:
                                len = 1;
@@ -2346,10 +2036,9 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                hostdata->last_message = msgout;
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                if (msgout == ABORT) {
-                                       hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
                                        hostdata->connected = NULL;
                                        cmd->result = DID_ERROR << 16;
-                                       cmd->scsi_done(cmd);
+                                       complete_cmd(instance, cmd);
                                        NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                }
@@ -2358,17 +2047,12 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                        case PHASE_CMDOUT:
                                len = cmd->cmd_len;
                                data = cmd->cmnd;
-                               /* 
-                                * XXX for performance reasons, on machines with a 
-                                * PSEUDO-DMA architecture we should probably 
-                                * use the dma transfer function.  
+                               /*
+                                * XXX for performance reasons, on machines with a
+                                * PSEUDO-DMA architecture we should probably
+                                * use the dma transfer function.
                                 */
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                               if (!cmd->device->disconnect && should_disconnect(cmd->cmnd[0])) {
-                                       NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-                                       dprintk(NDEBUG_USLEEP, "scsi%d : issued command, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
-                                       return;
-                               }
                                break;
                        case PHASE_STATIN:
                                len = 1;
@@ -2377,46 +2061,37 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
                                cmd->SCp.Status = tmp;
                                break;
                        default:
-                               printk("scsi%d : unknown phase\n", instance->host_no);
+                               shost_printk(KERN_ERR, instance, "unknown phase\n");
                                NCR5380_dprint(NDEBUG_ANY, instance);
-                       }       /* switch(phase) */
-               }               /* if (tmp * SR_REQ) */
-               else {
-                       /* RvC: go to sleep if polling time expired
-                        */
-                       if (!cmd->device->disconnect && time_after_eq(jiffies, poll_time)) {
-                               NCR5380_set_timer(hostdata, USLEEP_SLEEP);
-                               dprintk(NDEBUG_USLEEP, "scsi%d : poll timed out, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
-                               return;
-                       }
+                       } /* switch(phase) */
+               } else {
+                       spin_unlock_irq(&hostdata->lock);
+                       NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+                       spin_lock_irq(&hostdata->lock);
                }
-       }                       /* while (1) */
+       }
 }
 
 /*
  * Function : void NCR5380_reselect (struct Scsi_Host *instance)
  *
- * Purpose : does reselection, initializing the instance->connected 
- *      field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- *      nexus has been reestablished,
- *      
- * Inputs : instance - this instance of the NCR5380.
+ * Purpose : does reselection, initializing the instance->connected
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
  *
- * Locks: io_request_lock held by caller if IRQ driven
+ * Inputs : instance - this instance of the NCR5380.
  */
 
-static void NCR5380_reselect(struct Scsi_Host *instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)
-        instance->hostdata;
+static void NCR5380_reselect(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char target_mask;
        unsigned char lun, phase;
        int len;
        unsigned char msg[3];
        unsigned char *data;
-       struct scsi_cmnd *tmp = NULL, *prev;
-       int abort = 0;
-       NCR5380_setup(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *tmp;
 
        /*
         * Disable arbitration, etc. since the host adapter obviously
@@ -2424,12 +2099,12 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
         */
 
        NCR5380_write(MODE_REG, MR_BASE);
-       hostdata->restart_select = 1;
 
        target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
-       dprintk(NDEBUG_SELECTION, "scsi%d : reselect\n", instance->host_no);
 
-       /* 
+       dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
+
+       /*
         * At this point, we have detected that our SCSI ID is on the bus,
         * SEL is true and BSY was false for at least one bus settle delay
         * (400 ns).
@@ -2439,103 +2114,110 @@ static void NCR5380_reselect(struct Scsi_Host *instance) {
         */
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
-       /* FIXME: timeout too long, must fail to workqueue */   
-       if(NCR5380_poll_politely(instance, STATUS_REG, SR_SEL, 0, 2*HZ)<0)
-               abort = 1;
-               
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               return;
+       }
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
         * Wait for target to go into MSGIN.
-        * FIXME: timeout needed and fail to work queeu
         */
 
-       if(NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 2*HZ))
-               abort = 1;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+               do_abort(instance);
+               return;
+       }
 
        len = 1;
        data = msg;
        phase = PHASE_MSGIN;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
 
+       if (len) {
+               do_abort(instance);
+               return;
+       }
+
        if (!(msg[0] & 0x80)) {
-               printk(KERN_ERR "scsi%d : expecting IDENTIFY message, got ", instance->host_no);
+               shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
                spi_print_msg(msg);
-               abort = 1;
-       } else {
-               /* Accept message by clearing ACK */
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               lun = (msg[0] & 0x07);
+               printk("\n");
+               do_abort(instance);
+               return;
+       }
+       lun = msg[0] & 0x07;
 
-               /* 
-                * We need to add code for SCSI-II to track which devices have
-                * I_T_L_Q nexuses established, and which have simple I_T_L
-                * nexuses so we can chose to do additional data transfer.
-                */
+       /*
+        * We need to add code for SCSI-II to track which devices have
+        * I_T_L_Q nexuses established, and which have simple I_T_L
+        * nexuses so we can chose to do additional data transfer.
+        */
 
-               /* 
-                * Find the command corresponding to the I_T_L or I_T_L_Q  nexus we 
-                * just reestablished, and remove it from the disconnected queue.
-                */
+       /*
+        * Find the command corresponding to the I_T_L or I_T_L_Q  nexus we
+        * just reestablished, and remove it from the disconnected queue.
+        */
 
+       tmp = NULL;
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
 
-               for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                       if ((target_mask == (1 << tmp->device->id)) && (lun == (u8)tmp->device->lun)
-                           ) {
-                               if (prev) {
-                                       REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
-                                       prev->host_scribble = tmp->host_scribble;
-                               } else {
-                                       REMOVE(-1, hostdata->disconnected_queue, tmp, tmp->host_scribble);
-                                       hostdata->disconnected_queue = (struct scsi_cmnd *) tmp->host_scribble;
-                               }
-                               tmp->host_scribble = NULL;
-                               break;
-                       }
-               if (!tmp) {
-                       printk(KERN_ERR "scsi%d : warning : target bitmask %02x lun %d not in disconnect_queue.\n", instance->host_no, target_mask, lun);
-                       /* 
-                        * Since we have an established nexus that we can't do anything with,
-                        * we must abort it.  
-                        */
-                       abort = 1;
+               if (target_mask == (1 << scmd_id(cmd)) &&
+                   lun == (u8)cmd->device->lun) {
+                       list_del(&ncmd->list);
+                       tmp = cmd;
+                       break;
                }
        }
 
-       if (abort) {
-               do_abort(instance);
+       if (tmp) {
+               dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+                        "reselect: removed %p from disconnected queue\n", tmp);
        } else {
-               hostdata->connected = tmp;
-               dprintk(NDEBUG_RESELECTION, "scsi%d : nexus established, target = %d, lun = %llu, tag = %d\n", instance->host_no, tmp->device->id, tmp->device->lun, tmp->tag);
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+                            target_mask, lun);
+               /*
+                * Since we have an established nexus that we can't do anything
+                * with, we must abort it.
+                */
+               do_abort(instance);
+               return;
        }
+
+       /* Accept message by clearing ACK */
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+
+       hostdata->connected = tmp;
+       dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+                scmd_id(tmp), tmp->device->lun, tmp->tag);
 }
 
 /*
  * Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
  *
  * Purpose : called by interrupt handler when DMA finishes or a phase
- *      mismatch occurs (which would finish the DMA transfer).  
+ * mismatch occurs (which would finish the DMA transfer).
  *
  * Inputs : instance - this instance of the NCR5380.
  *
  * Returns : pointer to the scsi_cmnd structure for which the I_T_L
- *      nexus has been reestablished, on failure NULL is returned.
+ * nexus has been reestablished, on failure NULL is returned.
  */
 
 #ifdef REAL_DMA
 static void NCR5380_dma_complete(NCR5380_instance * instance) {
-       NCR5380_local_declare();
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int transferred;
-       NCR5380_setup(instance);
 
        /*
         * XXX this might not be right.
         *
         * Wait for final byte to transfer, ie wait for ACK to go false.
         *
-        * We should use the Last Byte Sent bit, unfortunately this is 
+        * We should use the Last Byte Sent bit, unfortunately this is
         * not available on the 5380/5381 (only the various CMOS chips)
         *
         * FIXME: timeout, and need to handle long timeout/irq case
@@ -2543,7 +2225,6 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
 
        NCR5380_poll_politely(instance, BUS_AND_STATUS_REG, BASR_ACK, 0, 5*HZ);
 
-       NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
@@ -2560,190 +2241,251 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) {
 }
 #endif                         /* def REAL_DMA */
 
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- *      host byte of the result field to, if zero DID_ABORTED is
- *      used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- *     XXX - there is no way to abort the command that is currently
- *     connected, you have to wait for it to complete.  If this is
- *     a problem, we could implement longjmp() / setjmp(), setjmp()
- *     called where the loop started in NCR5380_main().
- *
- * Locks: host lock taken by caller
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
  */
 
-static int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+                          struct scsi_cmnd *needle)
 {
-       NCR5380_local_declare();
-       struct Scsi_Host *instance = cmd->device->host;
-       struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-       struct scsi_cmnd *tmp, **prev;
+       struct NCR5380_cmd *ncmd;
 
-       scmd_printk(KERN_WARNING, cmd, "aborting command\n");
+       list_for_each_entry(ncmd, haystack, list)
+               if (NCR5380_to_scmd(ncmd) == needle)
+                       return true;
+       return false;
+}
 
-       NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
 
-       NCR5380_setup(instance);
+static bool list_del_cmd(struct list_head *haystack,
+                         struct scsi_cmnd *needle)
+{
+       if (list_find_cmd(haystack, needle)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
 
-       dprintk(NDEBUG_ABORT, "scsi%d : abort called\n", instance->host_no);
-       dprintk(NDEBUG_ABORT, "        basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG));
+               list_del(&ncmd->list);
+               return true;
+       }
+       return false;
+}
 
-#if 0
-/*
- * Case 1 : If the command is the currently executing command, 
- * we'll set the aborted flag and return control so that 
- * information transfer routine can exit cleanly.
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
  */
 
-       if (hostdata->connected == cmd) {
-               dprintk(NDEBUG_ABORT, "scsi%d : aborting connected command\n", instance->host_no);
-               hostdata->aborted = 1;
-/*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long flags;
+       int result = SUCCESS;
 
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN);
-/* 
- * Since we can't change phases until we've completed the current 
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-/* 
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-               return SUCCESS;
+       if (list_del_cmd(&hostdata->unissued, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from issue queue\n", cmd);
+               cmd->result = DID_ABORT << 16;
+               cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
        }
-#endif
 
-/* 
- * Case 2 : If the command hasn't been issued yet, we simply remove it 
- *          from the issue queue.
- */
-       dprintk(NDEBUG_ABORT, "scsi%d : abort going into loop.\n", instance->host_no);
-       for (prev = (struct scsi_cmnd **) &(hostdata->issue_queue), tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
-               if (cmd == tmp) {
-                       REMOVE(5, *prev, tmp, tmp->host_scribble);
-                       (*prev) = (struct scsi_cmnd *) tmp->host_scribble;
-                       tmp->host_scribble = NULL;
-                       tmp->result = DID_ABORT << 16;
-                       dprintk(NDEBUG_ABORT, "scsi%d : abort removed command from issue queue.\n", instance->host_no);
-                       tmp->scsi_done(tmp);
-                       return SUCCESS;
+       if (hostdata->selecting == cmd) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: cmd %p == selecting\n", cmd);
+               hostdata->selecting = NULL;
+               cmd->result = DID_ABORT << 16;
+               complete_cmd(instance, cmd);
+               goto out;
+       }
+
+       if (list_del_cmd(&hostdata->disconnected, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from disconnected list\n", cmd);
+               cmd->result = DID_ERROR << 16;
+               if (!hostdata->connected)
+                       NCR5380_select(instance, cmd);
+               if (hostdata->connected != cmd) {
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+       }
+
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
                }
-#if (NDEBUG  & NDEBUG_ABORT)
-       /* KLL */
-               else if (prev == tmp)
-                       printk(KERN_ERR "scsi%d : LOOP\n", instance->host_no);
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
 #endif
+               if (cmd->cmnd[0] == REQUEST_SENSE)
+                       complete_cmd(instance, cmd);
+               else {
+                       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-/* 
- * Case 3 : If any commands are connected, we're going to fail the abort
- *          and let the high level SCSI driver retry at a later time or 
- *          issue a reset.
- *
- *          Timeouts, and therefore aborted commands, will be highly unlikely
- *          and handling them cleanly in this situation would make the common
- *          case of noresets less efficient, and would pollute our code.  So,
- *          we fail.
- */
+                       /* Perform autosense for this command */
+                       list_add(&ncmd->list, &hostdata->autosense);
+               }
+       }
 
-       if (hostdata->connected) {
-               dprintk(NDEBUG_ABORT, "scsi%d : abort failed, command connected.\n", instance->host_no);
-               return FAILED;
+       if (list_find_cmd(&hostdata->autosense, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: found %p on sense queue\n", cmd);
+               spin_unlock_irqrestore(&hostdata->lock, flags);
+               queue_work(hostdata->work_q, &hostdata->main_task);
+               msleep(1000);
+               spin_lock_irqsave(&hostdata->lock, flags);
+               if (list_del_cmd(&hostdata->autosense, cmd)) {
+                       dsprintk(NDEBUG_ABORT, instance,
+                                "abort: removed %p from sense queue\n", cmd);
+                       set_host_byte(cmd, DID_ABORT);
+                       complete_cmd(instance, cmd);
+                       goto out;
+               }
        }
-/*
- * Case 4: If the command is currently disconnected from the bus, and 
- *      there are no connected commands, we reconnect the I_T_L or 
- *      I_T_L_Q nexus associated with it, go into message out, and send 
- *      an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select().  The easiest way to implement this 
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we 
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that 
- * device reselected.
- * 
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
 
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; tmp = (struct scsi_cmnd *) tmp->host_scribble)
-               if (cmd == tmp) {
-                       dprintk(NDEBUG_ABORT, "scsi%d : aborting disconnected command.\n", instance->host_no);
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               complete_cmd(instance, cmd);
+       }
 
-                       if (NCR5380_select(instance, cmd))
-                               return FAILED;
-                       dprintk(NDEBUG_ABORT, "scsi%d : nexus reestablished.\n", instance->host_no);
+out:
+       if (result == FAILED)
+               dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+       else
+               dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
 
-                       do_abort(instance);
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-                       for (prev = (struct scsi_cmnd **) &(hostdata->disconnected_queue), tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
-                               if (cmd == tmp) {
-                                       REMOVE(5, *prev, tmp, tmp->host_scribble);
-                                       *prev = (struct scsi_cmnd *) tmp->host_scribble;
-                                       tmp->host_scribble = NULL;
-                                       tmp->result = DID_ABORT << 16;
-                                       tmp->scsi_done(tmp);
-                                       return SUCCESS;
-                               }
-               }
-/*
- * Case 5 : If we reached this point, the command was not found in any of 
- *          the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
-       printk(KERN_WARNING "scsi%d : warning : SCSI command probably completed successfully\n"
-                       "         before abortion\n", instance->host_no);
-       return FAILED;
+       return result;
 }
 
 
-/* 
- * Function : int NCR5380_bus_reset (struct scsi_cmnd *cmd)
- * 
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
  *
- * Locks: host lock taken by caller
+ * Returns SUCCESS
  */
 
 static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
 {
        struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int i;
+       unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       NCR5380_print_status(instance);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-       spin_lock_irq(instance->host_lock);
        do_reset(instance);
-       spin_unlock_irq(instance->host_lock);
+
+       /* reset NCR registers */
+       NCR5380_write(MODE_REG, MR_BASE);
+       NCR5380_write(TARGET_COMMAND_REG, 0);
+       NCR5380_write(SELECT_ENABLE_REG, 0);
+
+       /* After the reset, there are no more connected or disconnected commands
+        * and no busy units; so clear the low-level status here to avoid
+        * conflicts when the mid-level code tries to wake up the affected
+        * commands!
+        */
+
+       hostdata->selecting = NULL;
+
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       list_for_each_entry(ncmd, &hostdata->autosense, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       if (hostdata->connected) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->connected);
+               hostdata->connected = NULL;
+       }
+
+       if (hostdata->sensing) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->sensing);
+               hostdata->sensing = NULL;
+       }
+
+       for (i = 0; i < 8; ++i)
+               hostdata->busy[i] = 0;
+#ifdef REAL_DMA
+       hostdata->dma_len = 0;
+#endif
+
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
        return SUCCESS;
 }
index 162112dd1bf8eabeaf0c8bb029291742d6c55ce4..a79288682a74ad91aad59b3ec8f64242337e3713 100644 (file)
 #ifndef NCR5380_H
 #define NCR5380_H
 
+#include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <scsi/scsi_dbg.h>
 #include <scsi/scsi_eh.h>
+#include <scsi/scsi_transport_spi.h>
 
 #define NDEBUG_ARBITRATION     0x1
 #define NDEBUG_AUTOSENSE       0x2
 /* Write any value to this register to start an ini mode DMA receive */
 #define START_DMA_INITIATOR_RECEIVE_REG 7      /* wo */
 
-#define C400_CONTROL_STATUS_REG NCR53C400_register_offset-8    /* rw */
-
+/* NCR 53C400(A) Control Status Register bits: */
 #define CSR_RESET              0x80    /* wo  Resets 53c400 */
 #define CSR_53C80_REG          0x80    /* ro  5380 registers busy */
 #define CSR_TRANS_DIR          0x40    /* rw  Data transfer direction */
 #define CSR_BASE CSR_53C80_INTR
 #endif
 
-/* Number of 128-byte blocks to be transferred */
-#define C400_BLOCK_COUNTER_REG   NCR53C400_register_offset-7   /* rw */
-
-/* Resume transfer after disconnect */
-#define C400_RESUME_TRANSFER_REG NCR53C400_register_offset-6   /* wo */
-
-/* Access to host buffer stack */
-#define C400_HOST_BUFFER         NCR53C400_register_offset-4   /* rw */
-
-
 /* Note : PHASE_* macros are based on the values of the STATUS register */
 #define PHASE_MASK     (SR_MSG | SR_CD | SR_IO)
 
 
 #define PHASE_SR_TO_TCR(phase) ((phase) >> 2)
 
-/*
- * The internal should_disconnect() function returns these based on the 
- * expected length of a disconnect if a device supports disconnect/
- * reconnect.
- */
-
-#define DISCONNECT_NONE                0
-#define DISCONNECT_TIME_TO_DATA        1
-#define DISCONNECT_LONG                2
-
 /* 
  * "Special" value for the (unsigned char) command tag, to indicate
  * I_T_L nexus instead of I_T_L_Q.
 #define NO_IRQ         0
 #endif
 
-#define FLAG_HAS_LAST_BYTE_SENT                1       /* NCR53c81 or better */
-#define FLAG_CHECK_LAST_BYTE_SENT      2       /* Only test once */
-#define FLAG_NCR53C400                 4       /* NCR53c400 */
+#define FLAG_NO_DMA_FIXUP              1       /* No DMA errata workarounds */
 #define FLAG_NO_PSEUDO_DMA             8       /* Inhibit DMA */
-#define FLAG_DTC3181E                  16      /* DTC3181E */
 #define FLAG_LATE_DMA_SETUP            32      /* Setup NCR before DMA H/W */
 #define FLAG_TAGGED_QUEUING            64      /* as X3T9.2 spelled it */
-
-#ifndef ASM
+#define FLAG_TOSHIBA_DELAY             128     /* Allow for borken CD-ROMs */
 
 #ifdef SUPPORT_TAGS
 struct tag_alloc {
@@ -258,33 +238,24 @@ struct NCR5380_hostdata {
        NCR5380_implementation_fields;          /* implementation specific */
        struct Scsi_Host *host;                 /* Host backpointer */
        unsigned char id_mask, id_higher_mask;  /* 1 << id, all bits greater */
-       unsigned char targets_present;          /* targets we have connected
-                                                  to, so we can call a select
-                                                  failure a retryable condition */
-       volatile unsigned char busy[8];         /* index = target, bit = lun */
+       unsigned char busy[8];                  /* index = target, bit = lun */
 #if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-       volatile int dma_len;                   /* requested length of DMA */
+       int dma_len;                            /* requested length of DMA */
 #endif
-       volatile unsigned char last_message;    /* last message OUT */
-       volatile struct scsi_cmnd *connected;   /* currently connected command */
-       volatile struct scsi_cmnd *issue_queue; /* waiting to be issued */
-       volatile struct scsi_cmnd *disconnected_queue;  /* waiting for reconnect */
-       volatile int restart_select;            /* we have disconnected,
-                                                  used to restart 
-                                                  NCR5380_select() */
-       volatile unsigned aborted:1;            /* flag, says aborted */
+       unsigned char last_message;             /* last message OUT */
+       struct scsi_cmnd *connected;            /* currently connected cmnd */
+       struct scsi_cmnd *selecting;            /* cmnd to be connected */
+       struct list_head unissued;              /* waiting to be issued */
+       struct list_head autosense;             /* priority issue queue */
+       struct list_head disconnected;          /* waiting for reconnect */
+       spinlock_t lock;                        /* protects this struct */
        int flags;
-       unsigned long time_expires;             /* in jiffies, set prior to sleeping */
-       int select_time;                        /* timer in select for target response */
-       volatile struct scsi_cmnd *selecting;
-       struct delayed_work coroutine;          /* our co-routine */
        struct scsi_eh_save ses;
+       struct scsi_cmnd *sensing;
        char info[256];
        int read_overruns;                /* number of bytes to cut from a
                                           * transfer to handle chip overruns */
-       int retain_dma_intr;
        struct work_struct main_task;
-       volatile int main_running;
 #ifdef SUPPORT_TAGS
        struct tag_alloc TagAlloc[8][8];        /* 8 targets and 8 LUNs */
 #endif
@@ -292,10 +263,23 @@ struct NCR5380_hostdata {
        unsigned spin_max_r;
        unsigned spin_max_w;
 #endif
+       struct workqueue_struct *work_q;
+       unsigned long accesses_per_ms;  /* chip register accesses per ms */
 };
 
 #ifdef __KERNEL__
 
+struct NCR5380_cmd {
+       struct list_head list;
+};
+
+#define NCR5380_CMD_SIZE               (sizeof(struct NCR5380_cmd))
+
+static inline struct scsi_cmnd *NCR5380_to_scmd(struct NCR5380_cmd *ncmd_ptr)
+{
+       return ((struct scsi_cmnd *)ncmd_ptr) - 1;
+}
+
 #ifndef NDEBUG
 #define NDEBUG (0)
 #endif
@@ -304,6 +288,11 @@ struct NCR5380_hostdata {
        do { if ((NDEBUG) & (flg)) \
                printk(KERN_DEBUG fmt, ## __VA_ARGS__); } while (0)
 
+#define dsprintk(flg, host, fmt, ...) \
+       do { if ((NDEBUG) & (flg)) \
+               shost_printk(KERN_DEBUG, host, fmt, ## __VA_ARGS__); \
+       } while (0)
+
 #if NDEBUG
 #define NCR5380_dprint(flg, arg) \
        do { if ((NDEBUG) & (flg)) NCR5380_print(arg); } while (0)
@@ -320,6 +309,7 @@ static void NCR5380_print(struct Scsi_Host *instance);
 static int NCR5380_probe_irq(struct Scsi_Host *instance, int possible);
 #endif
 static int NCR5380_init(struct Scsi_Host *instance, int flags);
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *);
 static void NCR5380_exit(struct Scsi_Host *instance);
 static void NCR5380_information_transfer(struct Scsi_Host *instance);
 #ifndef DONT_USE_INTR
@@ -328,7 +318,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id);
 static void NCR5380_main(struct work_struct *work);
 static const char *NCR5380_info(struct Scsi_Host *instance);
 static void NCR5380_reselect(struct Scsi_Host *instance);
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd);
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
 #if defined(PSEUDO_DMA) || defined(REAL_DMA) || defined(REAL_DMA_POLL)
 static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
 #endif
@@ -443,5 +433,4 @@ static __inline__ int NCR5380_pc_dma_residual(struct Scsi_Host *instance)
 #endif                         /* defined(i386) || defined(__alpha__) */
 #endif                         /* defined(REAL_DMA)  */
 #endif                         /* __KERNEL__ */
-#endif                         /* ndef ASM */
 #endif                         /* NCR5380_H */
index d28d6c0f18c0b76acd384f53ed6132a54f13fa57..221f18c5df9363c704d8289c2baab4b10dd89db6 100644 (file)
@@ -4,9 +4,7 @@
  * Copyright 1995-2002, Russell King
  */
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 
 
 #include <scsi/scsi_host.h>
 
-#include <scsi/scsicam.h>
-
 #define PSEUDO_DMA
 
 #define priv(host)                     ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare()                struct Scsi_Host *_instance
-#define NCR5380_setup(instance)                _instance = instance
-#define NCR5380_read(reg)              cumanascsi_read(_instance, reg)
-#define NCR5380_write(reg, value)      cumanascsi_write(_instance, reg, value)
+#define NCR5380_read(reg)              cumanascsi_read(instance, reg)
+#define NCR5380_write(reg, value)      cumanascsi_write(instance, reg, value)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
+
 #define NCR5380_intr                   cumanascsi_intr
 #define NCR5380_queue_command          cumanascsi_queue_command
 #define NCR5380_info                   cumanascsi_info
@@ -211,6 +208,8 @@ static struct scsi_host_template cumanascsi_template = {
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
        .proc_name              = "CumanaSCSI-1",
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int cumanascsi1_probe(struct expansion_card *ec,
@@ -240,23 +239,21 @@ static int cumanascsi1_probe(struct expansion_card *ec,
 
        host->irq = ec->irq;
 
-       NCR5380_init(host, 0);
+       ret = NCR5380_init(host, 0);
+       if (ret)
+               goto out_unmap;
+
+       NCR5380_maybe_reset_bus(host);
 
         priv(host)->ctrl = 0;
         writeb(0, priv(host)->base + CTRL);
 
-       host->n_io_port = 255;
-       if (!(request_region(host->io_port, host->n_io_port, "CumanaSCSI-1"))) {
-               ret = -EBUSY;
-               goto out_unmap;
-       }
-
        ret = request_irq(host->irq, cumanascsi_intr, 0,
                          "CumanaSCSI-1", host);
        if (ret) {
                printk("scsi%d: IRQ%d not free: %d\n",
                    host->host_no, host->irq, ret);
-               goto out_unmap;
+               goto out_exit;
        }
 
        ret = scsi_add_host(host, &ec->dev);
@@ -268,6 +265,8 @@ static int cumanascsi1_probe(struct expansion_card *ec,
 
  out_free_irq:
        free_irq(host->irq, host);
+ out_exit:
+       NCR5380_exit(host);
  out_unmap:
        iounmap(priv(host)->base);
        iounmap(priv(host)->dma);
index 7c6fa1479c9c1f4c92cd98c891e800d8b7a12ecb..1fab1d1896b1a7e81a53d2dc664e6f52a712b6d4 100644 (file)
@@ -5,9 +5,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 
 #define DONT_USE_INTR
 
 #define priv(host)                     ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare()                void __iomem *_base
-#define NCR5380_setup(host)            _base = priv(host)->base
 
-#define NCR5380_read(reg)              readb(_base + ((reg) << 2))
-#define NCR5380_write(reg, value)      writeb(value, _base + ((reg) << 2))
+#define NCR5380_read(reg) \
+       readb(priv(instance)->base + ((reg) << 2))
+#define NCR5380_write(reg, value) \
+       writeb(value, priv(instance)->base + ((reg) << 2))
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
+
 #define NCR5380_queue_command          oakscsi_queue_command
 #define NCR5380_info                   oakscsi_info
-#define NCR5380_show_info              oakscsi_show_info
 
 #define NCR5380_implementation_fields  \
        void __iomem *base
@@ -103,7 +103,6 @@ printk("reading %p len %d\n", addr, len);
 
 static struct scsi_host_template oakscsi_template = {
        .module                 = THIS_MODULE,
-       .show_info              = oakscsi_show_info,
        .name                   = "Oak 16-bit SCSI",
        .info                   = oakscsi_info,
        .queuecommand           = oakscsi_queue_command,
@@ -115,6 +114,8 @@ static struct scsi_host_template oakscsi_template = {
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
        .proc_name              = "oakscsi",
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
@@ -142,15 +143,21 @@ static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
        host->irq = NO_IRQ;
        host->n_io_port = 255;
 
-       NCR5380_init(host, 0);
+       ret = NCR5380_init(host, 0);
+       if (ret)
+               goto out_unmap;
+
+       NCR5380_maybe_reset_bus(host);
 
        ret = scsi_add_host(host, &ec->dev);
        if (ret)
-               goto out_unmap;
+               goto out_exit;
 
        scsi_scan_host(host);
        goto out;
 
+ out_exit:
+       NCR5380_exit(host);
  out_unmap:
        iounmap(priv(host)->base);
  unreg:
index db87ece6edb2cd317d463c1afb21f7b9a9942b33..e65478651ca94030ac2b5d2671eecc7e7d4456e3 100644 (file)
@@ -1,15 +1,15 @@
 /*
  * NCR 5380 generic driver routines.  These should make it *trivial*
- *     to implement 5380 SCSI drivers under Linux with a non-trantor
- *     architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
  *
- *     Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
  *
  * Copyright 1993, Drew Eckhardt
- *     Visionary Computing
- *     (Unix and Linux consulting and custom programming)
- *     drew@colorado.edu
- *     +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
  *
  * For more information, please consult
  *
  * 1+ (800) 334-5454
  */
 
-/*
- * ++roman: To port the 5380 driver to the Atari, I had to do some changes in
- * this file, too:
- *
- *  - Some of the debug statements were incorrect (undefined variables and the
- *    like). I fixed that.
- *
- *  - In information_transfer(), I think a #ifdef was wrong. Looking at the
- *    possible DMA transfer size should also happen for REAL_DMA. I added this
- *    in the #if statement.
- *
- *  - When using real DMA, information_transfer() should return in a DATAOUT
- *    phase after starting the DMA. It has nothing more to do.
- *
- *  - The interrupt service routine should run main after end of DMA, too (not
- *    only after RESELECTION interrupts). Additionally, it should _not_ test
- *    for more interrupts after running main, since a DMA process may have
- *    been started and interrupts are turned on now. The new int could happen
- *    inside the execution of NCR5380_intr(), leading to recursive
- *    calls.
- *
- *  - I've added a function merge_contiguous_buffers() that tries to
- *    merge scatter-gather buffers that are located at contiguous
- *    physical addresses and can be processed with the same DMA setup.
- *    Since most scatter-gather operations work on a page (4K) of
- *    4 buffers (1K), in more than 90% of all cases three interrupts and
- *    DMA setup actions are saved.
- *
- * - I've deleted all the stuff for AUTOPROBE_IRQ, REAL_DMA_POLL, PSEUDO_DMA
- *    and USLEEP, because these were messing up readability and will never be
- *    needed for Atari SCSI.
- *
- * - I've revised the NCR5380_main() calling scheme (relax the 'main_running'
- *   stuff), and 'main' is executed in a bottom half if awoken by an
- *   interrupt.
- *
- * - The code was quite cluttered up by "#if (NDEBUG & NDEBUG_*) printk..."
- *   constructs. In my eyes, this made the source rather unreadable, so I
- *   finally replaced that by the *_PRINTK() macros.
- *
- */
-
-/*
- * Further development / testing that should be done :
- * 1.  Test linked command handling code after Eric is ready with
- *     the high level code.
- */
+/* Ported to Atari by Roman Hodek and others. */
 
 /* Adapted for the sun3 by Sam Creasey. */
 
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x, y)                                             \
-       do {                                                    \
-               printk("LINE:%d   Adding %p to %p\n",           \
-                      __LINE__, (void*)(x), (void*)(y));       \
-               if ((x) == (y))                                 \
-                       udelay(5);                              \
-       } while (0)
-#define REMOVE(w, x, y, z)                                     \
-       do {                                                    \
-               printk("LINE:%d   Removing: %p->%p  %p->%p \n", \
-                      __LINE__, (void*)(w), (void*)(x),        \
-                      (void*)(y), (void*)(z));                 \
-               if ((x) == (y))                                 \
-                       udelay(5);                              \
-       } while (0)
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
-
-#ifndef notyet
-#undef LINKED
-#endif
-
 /*
  * Design
  *
  * piece of hardware that requires you to sit in a loop polling for
  * the REQ signal as long as you are connected.  Some devices are
  * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected.  If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
- *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done.  An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * while doing long seek operations. [...] These
  * broken devices are the exception rather than the rule and I'd rather
  * spend my time optimizing for the normal case.
  *
  *
  * These macros control options :
  * AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- *     for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
  *
  * DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- *     transceivers.
- *
- * LINKED - if defined, linked commands are supported.
+ * transceivers.
  *
  * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
  *
  * NCR5380_write(register, value) - write to the specific register
  *
  * NCR5380_implementation_fields  - additional fields needed for this
- *      specific implementation of the NCR5380
+ * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
  * REAL functions :
  * NCR5380_REAL_DMA should be defined if real DMA is to be used.
  * Note that the DMA setup functions should return the number of bytes
- *     that they were able to program the controller for.
+ * that they were able to program the controller for.
  *
  * Also note that generic i386/PC versions of these macros are
- *     available as NCR5380_i386_dma_write_setup,
- *     NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * possible) function may be used.
  */
 
-/* Macros ease life... :-) */
-#define        SETUP_HOSTDATA(in)                              \
-    struct NCR5380_hostdata *hostdata =                        \
-       (struct NCR5380_hostdata *)(in)->hostdata
-#define        HOSTDATA(in) ((struct NCR5380_hostdata *)(in)->hostdata)
-
-#define        NEXT(cmd)               ((struct scsi_cmnd *)(cmd)->host_scribble)
-#define        SET_NEXT(cmd,next)      ((cmd)->host_scribble = (void *)(next))
-#define        NEXTADDR(cmd)           ((struct scsi_cmnd **)&(cmd)->host_scribble)
-
-#define        HOSTNO          instance->host_no
-#define        H_NO(cmd)       (cmd)->device->host->host_no
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
 
 #ifdef SUPPORT_TAGS
 
  * cannot know it in advance :-( We just see a QUEUE_FULL status being
  * returned. So, in this case, the driver internal queue size assumption is
  * reduced to the number of active tags if QUEUE_FULL is returned by the
- * target. The command is returned to the mid-level, but with status changed
- * to BUSY, since --as I've seen-- the mid-level can't handle QUEUE_FULL
- * correctly.
+ * target.
  *
  * We're also not allowed running tagged commands as long as an untagged
  * command is active. And REQUEST SENSE commands after a contingent allegiance
@@ -304,7 +206,8 @@ static void __init init_tags(struct NCR5380_hostdata *hostdata)
 static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        if (hostdata->busy[cmd->device->id] & (1 << lun))
                return 1;
@@ -314,8 +217,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
                return 0;
        if (hostdata->TagAlloc[scmd_id(cmd)][lun].nr_allocated >=
            hostdata->TagAlloc[scmd_id(cmd)][lun].queue_size) {
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d: no free tags\n",
-                          H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d: no free tags\n",
+                        scmd_id(cmd), lun);
                return 1;
        }
        return 0;
@@ -330,7 +233,8 @@ static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
 static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        /* If we or the target don't support tagged queuing, allocate the LUN for
         * an untagged command.
@@ -340,18 +244,16 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
            !cmd->device->tagged_supported) {
                cmd->tag = TAG_NONE;
                hostdata->busy[cmd->device->id] |= (1 << lun);
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d now allocated by untagged "
-                          "command\n", H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d now allocated by untagged command\n",
+                        scmd_id(cmd), lun);
        } else {
                struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
 
                cmd->tag = find_first_zero_bit(ta->allocated, MAX_TAGS);
                set_bit(cmd->tag, ta->allocated);
                ta->nr_allocated++;
-               dprintk(NDEBUG_TAGS, "scsi%d: using tag %d for target %d lun %d "
-                          "(now %d tags in use)\n",
-                          H_NO(cmd), cmd->tag, cmd->device->id,
-                          lun, ta->nr_allocated);
+               dsprintk(NDEBUG_TAGS, instance, "using tag %d for target %d lun %d (%d tags allocated)\n",
+                        cmd->tag, scmd_id(cmd), lun, ta->nr_allocated);
        }
 }
 
@@ -363,21 +265,22 @@ static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
 static void cmd_free_tag(struct scsi_cmnd *cmd)
 {
        u8 lun = cmd->device->lun;
-       SETUP_HOSTDATA(cmd->device->host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        if (cmd->tag == TAG_NONE) {
                hostdata->busy[cmd->device->id] &= ~(1 << lun);
-               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d untagged cmd finished\n",
-                          H_NO(cmd), cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "target %d lun %d untagged cmd freed\n",
+                        scmd_id(cmd), lun);
        } else if (cmd->tag >= MAX_TAGS) {
-               printk(KERN_NOTICE "scsi%d: trying to free bad tag %d!\n",
-                      H_NO(cmd), cmd->tag);
+               shost_printk(KERN_NOTICE, instance,
+                            "trying to free bad tag %d!\n", cmd->tag);
        } else {
                struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
                clear_bit(cmd->tag, ta->allocated);
                ta->nr_allocated--;
-               dprintk(NDEBUG_TAGS, "scsi%d: freed tag %d for target %d lun %d\n",
-                          H_NO(cmd), cmd->tag, cmd->device->id, lun);
+               dsprintk(NDEBUG_TAGS, instance, "freed tag %d for target %d lun %d\n",
+                        cmd->tag, scmd_id(cmd), lun);
        }
 }
 
@@ -401,17 +304,15 @@ static void free_all_tags(struct NCR5380_hostdata *hostdata)
 
 #endif /* SUPPORT_TAGS */
 
-
-/*
- * Function: void merge_contiguous_buffers( struct scsi_cmnd *cmd )
- *
- * Purpose: Try to merge several scatter-gather requests into one DMA
- *    transfer. This is possible if the scatter buffers lie on
- *    physical contiguous addresses.
- *
- * Parameters: struct scsi_cmnd *cmd
- *    The command to work on. The first scatter buffer's data are
- *    assumed to be already transferred into ptr/this_residual.
+/**
+ * merge_contiguous_buffers - coalesce scatter-gather list entries
+ * @cmd: command requesting IO
+ *
+ * Try to merge several scatter-gather buffers into one DMA transfer.
+ * This is possible if the scatter buffers lie on physically
+ * contiguous addresses. The first scatter-gather buffer's data are
+ * assumed to be already transferred into cmd->SCp.this_residual.
+ * Every buffer merged avoids an interrupt and a DMA setup operation.
  */
 
 static void merge_contiguous_buffers(struct scsi_cmnd *cmd)
@@ -463,9 +364,7 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.buffers_residual = scsi_sg_count(cmd) - 1;
                cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
                cmd->SCp.this_residual = cmd->SCp.buffer->length;
-               /* ++roman: Try to merge some scatter-buffers if they are at
-                * contiguous physical addresses.
-                */
+
                merge_contiguous_buffers(cmd);
        } else {
                cmd->SCp.buffer = NULL;
@@ -473,31 +372,110 @@ static inline void initialize_SCp(struct scsi_cmnd *cmd)
                cmd->SCp.ptr = NULL;
                cmd->SCp.this_residual = 0;
        }
+
+       cmd->SCp.Status = 0;
+       cmd->SCp.Message = 0;
+}
+
+/**
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
+ */
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+                                  int reg1, int bit1, int val1,
+                                  int reg2, int bit2, int val2, int wait)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long deadline = jiffies + wait;
+       unsigned long n;
+
+       /* Busy-wait for up to 10 ms */
+       n = min(10000U, jiffies_to_usecs(wait));
+       n *= hostdata->accesses_per_ms;
+       n /= 2000;
+       do {
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
+                       return 0;
+               cpu_relax();
+       } while (n--);
+
+       if (irqs_disabled() || in_interrupt())
+               return -ETIMEDOUT;
+
+       /* Repeatedly sleep for 1 ms until deadline */
+       while (time_is_after_jiffies(deadline)) {
+               schedule_timeout_uninterruptible(1);
+               if ((NCR5380_read(reg1) & bit1) == val1)
+                       return 0;
+               if ((NCR5380_read(reg2) & bit2) == val2)
+                       return 0;
+       }
+
+       return -ETIMEDOUT;
 }
 
-#include <linux/delay.h>
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+                                        int reg, int bit, int val, int wait)
+{
+       return NCR5380_poll_politely2(instance, reg, bit, val,
+                                               reg, bit, val, wait);
+}
 
 #if NDEBUG
 static struct {
        unsigned char mask;
        const char *name;
 } signals[] = {
-       { SR_DBP, "PARITY"}, { SR_RST, "RST" }, { SR_BSY, "BSY" },
-       { SR_REQ, "REQ" }, { SR_MSG, "MSG" }, { SR_CD,  "CD" }, { SR_IO, "IO" },
-       { SR_SEL, "SEL" }, {0, NULL}
-}, basrs[] = {
-       {BASR_ATN, "ATN"}, {BASR_ACK, "ACK"}, {0, NULL}
-}, icrs[] = {
-       {ICR_ASSERT_RST, "ASSERT RST"},{ICR_ASSERT_ACK, "ASSERT ACK"},
-       {ICR_ASSERT_BSY, "ASSERT BSY"}, {ICR_ASSERT_SEL, "ASSERT SEL"},
-       {ICR_ASSERT_ATN, "ASSERT ATN"}, {ICR_ASSERT_DATA, "ASSERT DATA"},
+       {SR_DBP, "PARITY"},
+       {SR_RST, "RST"},
+       {SR_BSY, "BSY"},
+       {SR_REQ, "REQ"},
+       {SR_MSG, "MSG"},
+       {SR_CD, "CD"},
+       {SR_IO, "IO"},
+       {SR_SEL, "SEL"},
        {0, NULL}
-}, mrs[] = {
-       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, {MR_TARGET, "MODE TARGET"},
-       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, {MR_ENABLE_PAR_INTR,
-       "MODE PARITY INTR"}, {MR_ENABLE_EOP_INTR,"MODE EOP INTR"},
+},
+basrs[] = {
+       {BASR_ATN, "ATN"},
+       {BASR_ACK, "ACK"},
+       {0, NULL}
+},
+icrs[] = {
+       {ICR_ASSERT_RST, "ASSERT RST"},
+       {ICR_ASSERT_ACK, "ASSERT ACK"},
+       {ICR_ASSERT_BSY, "ASSERT BSY"},
+       {ICR_ASSERT_SEL, "ASSERT SEL"},
+       {ICR_ASSERT_ATN, "ASSERT ATN"},
+       {ICR_ASSERT_DATA, "ASSERT DATA"},
+       {0, NULL}
+},
+mrs[] = {
+       {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+       {MR_TARGET, "MODE TARGET"},
+       {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+       {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+       {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
        {MR_MONITOR_BSY, "MODE MONITOR BSY"},
-       {MR_DMA_MODE, "MODE DMA"}, {MR_ARBITRATE, "MODE ARBITRATION"},
+       {MR_DMA_MODE, "MODE DMA"},
+       {MR_ARBITRATE, "MODE ARBITRATION"},
        {0, NULL}
 };
 
@@ -511,15 +489,13 @@ static struct {
 static void NCR5380_print(struct Scsi_Host *instance)
 {
        unsigned char status, data, basr, mr, icr, i;
-       unsigned long flags;
 
-       local_irq_save(flags);
        data = NCR5380_read(CURRENT_SCSI_DATA_REG);
        status = NCR5380_read(STATUS_REG);
        mr = NCR5380_read(MODE_REG);
        icr = NCR5380_read(INITIATOR_COMMAND_REG);
        basr = NCR5380_read(BUS_AND_STATUS_REG);
-       local_irq_restore(flags);
+
        printk("STATUS_REG: %02x ", status);
        for (i = 0; signals[i].mask; ++i)
                if (status & signals[i].mask)
@@ -543,8 +519,12 @@ static struct {
        unsigned char value;
        const char *name;
 } phases[] = {
-       {PHASE_DATAOUT, "DATAOUT"}, {PHASE_DATAIN, "DATAIN"}, {PHASE_CMDOUT, "CMDOUT"},
-       {PHASE_STATIN, "STATIN"}, {PHASE_MSGOUT, "MSGOUT"}, {PHASE_MSGIN, "MSGIN"},
+       {PHASE_DATAOUT, "DATAOUT"},
+       {PHASE_DATAIN, "DATAIN"},
+       {PHASE_CMDOUT, "CMDOUT"},
+       {PHASE_STATIN, "STATIN"},
+       {PHASE_MSGOUT, "MSGOUT"},
+       {PHASE_MSGIN, "MSGIN"},
        {PHASE_UNKNOWN, "UNKNOWN"}
 };
 
@@ -553,8 +533,6 @@ static struct {
  * @instance: adapter to dump
  *
  * Print the current SCSI phase for debugging purposes
- *
- * Locks: none
  */
 
 static void NCR5380_print_phase(struct Scsi_Host *instance)
@@ -564,54 +542,21 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
 
        status = NCR5380_read(STATUS_REG);
        if (!(status & SR_REQ))
-               printk(KERN_DEBUG "scsi%d: REQ not asserted, phase unknown.\n", HOSTNO);
+               shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
        else {
                for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
                     (phases[i].value != (status & PHASE_MASK)); ++i)
                        ;
-               printk(KERN_DEBUG "scsi%d: phase %s\n", HOSTNO, phases[i].name);
+               shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
        }
 }
-
 #endif
 
-/*
- * ++roman: New scheme of calling NCR5380_main()
- *
- * If we're not in an interrupt, we can call our main directly, it cannot be
- * already running. Else, we queue it on a task queue, if not 'main_running'
- * tells us that a lower level is already executing it. This way,
- * 'main_running' needs not be protected in a special way.
- *
- * queue_main() is a utility function for putting our main onto the task
- * queue, if main_running is false. It should be called only from a
- * interrupt or bottom half.
- */
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-static inline void queue_main(struct NCR5380_hostdata *hostdata)
-{
-       if (!hostdata->main_running) {
-               /* If in interrupt and NCR5380_main() not already running,
-                  queue it on the 'immediate' task queue, to be processed
-                  immediately after the current interrupt processing has
-                  finished. */
-               schedule_work(&hostdata->main_task);
-       }
-       /* else: nothing to do: the running NCR5380_main() will pick up
-          any newly queued command. */
-}
-
 /**
  * NCR58380_info - report driver and host information
  * @instance: relevant scsi host instance
  *
  * For use as the host template info() handler.
- *
- * Locks: none
  */
 
 static const char *NCR5380_info(struct Scsi_Host *instance)
@@ -630,13 +575,14 @@ static void prepare_info(struct Scsi_Host *instance)
                 "base 0x%lx, irq %d, "
                 "can_queue %d, cmd_per_lun %d, "
                 "sg_tablesize %d, this_id %d, "
-                "flags { %s}, "
+                "flags { %s%s}, "
                 "options { %s} ",
                 instance->hostt->name, instance->io_port, instance->n_io_port,
                 instance->base, instance->irq,
                 instance->can_queue, instance->cmd_per_lun,
                 instance->sg_tablesize, instance->this_id,
                 hostdata->flags & FLAG_TAGGED_QUEUING ? "TAGGED_QUEUING " : "",
+                hostdata->flags & FLAG_TOSHIBA_DELAY  ? "TOSHIBA_DELAY "  : "",
 #ifdef DIFFERENTIAL
                 "DIFFERENTIAL "
 #endif
@@ -652,102 +598,6 @@ static void prepare_info(struct Scsi_Host *instance)
                 "");
 }
 
-/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * to aid debugging.
- */
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd)
-{
-       int i, s;
-       unsigned char *command;
-       printk("scsi%d: destination target %d, lun %llu\n",
-               H_NO(cmd), cmd->device->id, cmd->device->lun);
-       printk(KERN_CONT "        command = ");
-       command = cmd->cmnd;
-       printk(KERN_CONT "%2d (0x%02x)", command[0], command[0]);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               printk(KERN_CONT " %02x", command[i]);
-       printk("\n");
-}
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-       unsigned long flags;
-
-       NCR5380_dprint(NDEBUG_ANY, instance);
-       NCR5380_dprint_phase(NDEBUG_ANY, instance);
-
-       hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
-       local_irq_save(flags);
-       printk("NCR5380: coroutine is%s running.\n",
-               hostdata->main_running ? "" : "n't");
-       if (!hostdata->connected)
-               printk("scsi%d: no currently connected command\n", HOSTNO);
-       else
-               lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected);
-       printk("scsi%d: issue_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
-               lprint_Scsi_Cmnd(ptr);
-
-       printk("scsi%d: disconnected_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
-            ptr = NEXT(ptr))
-               lprint_Scsi_Cmnd(ptr);
-
-       local_irq_restore(flags);
-       printk("\n");
-}
-
-static void show_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
-       int i, s;
-       unsigned char *command;
-       seq_printf(m, "scsi%d: destination target %d, lun %llu\n",
-               H_NO(cmd), cmd->device->id, cmd->device->lun);
-       seq_puts(m, "        command = ");
-       command = cmd->cmnd;
-       seq_printf(m, "%2d (0x%02x)", command[0], command[0]);
-       for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
-               seq_printf(m, " %02x", command[i]);
-       seq_putc(m, '\n');
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-                                            struct Scsi_Host *instance)
-{
-       struct NCR5380_hostdata *hostdata;
-       struct scsi_cmnd *ptr;
-       unsigned long flags;
-
-       hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
-       local_irq_save(flags);
-       seq_printf(m, "NCR5380: coroutine is%s running.\n",
-               hostdata->main_running ? "" : "n't");
-       if (!hostdata->connected)
-               seq_printf(m, "scsi%d: no currently connected command\n", HOSTNO);
-       else
-               show_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
-       seq_printf(m, "scsi%d: issue_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
-               show_Scsi_Cmnd(ptr, m);
-
-       seq_printf(m, "scsi%d: disconnected_queue\n", HOSTNO);
-       for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
-            ptr = NEXT(ptr))
-               show_Scsi_Cmnd(ptr, m);
-
-       local_irq_restore(flags);
-       return 0;
-}
-
 /**
  * NCR5380_init - initialise an NCR5380
  * @instance: adapter to configure
@@ -764,11 +614,11 @@ static int __maybe_unused NCR5380_show_info(struct seq_file *m,
 
 static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int i;
-       SETUP_HOSTDATA(instance);
+       unsigned long deadline;
 
        hostdata->host = instance;
-       hostdata->aborted = 0;
        hostdata->id_mask = 1 << instance->this_id;
        hostdata->id_higher_mask = 0;
        for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
@@ -782,13 +632,21 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
 #if defined (REAL_DMA)
        hostdata->dma_len = 0;
 #endif
-       hostdata->targets_present = 0;
+       spin_lock_init(&hostdata->lock);
        hostdata->connected = NULL;
-       hostdata->issue_queue = NULL;
-       hostdata->disconnected_queue = NULL;
+       hostdata->sensing = NULL;
+       INIT_LIST_HEAD(&hostdata->autosense);
+       INIT_LIST_HEAD(&hostdata->unissued);
+       INIT_LIST_HEAD(&hostdata->disconnected);
+
        hostdata->flags = flags;
 
        INIT_WORK(&hostdata->main_task, NCR5380_main);
+       hostdata->work_q = alloc_workqueue("ncr5380_%d",
+                               WQ_UNBOUND | WQ_MEM_RECLAIM,
+                               1, instance->host_no);
+       if (!hostdata->work_q)
+               return -ENOMEM;
 
        prepare_info(instance);
 
@@ -797,6 +655,72 @@ static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       /* Calibrate register polling loop */
+       i = 0;
+       deadline = jiffies + 1;
+       do {
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       deadline += msecs_to_jiffies(256);
+       do {
+               NCR5380_read(STATUS_REG);
+               ++i;
+               cpu_relax();
+       } while (time_is_after_jiffies(deadline));
+       hostdata->accesses_per_ms = i / 256;
+
+       return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int pass;
+
+       for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
+               switch (pass) {
+               case 1:
+               case 3:
+               case 5:
+                       shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+                       NCR5380_poll_politely(instance,
+                                             STATUS_REG, SR_BSY, 0, 5 * HZ);
+                       break;
+               case 2:
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
+                       do_abort(instance);
+                       break;
+               case 4:
+                       shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
+                       do_reset(instance);
+                       /* Wait after a reset; the SCSI standard calls for
+                        * 250ms, we wait 500ms to be on the safe side.
+                        * But some Toshiba CD-ROMs need ten times that.
+                        */
+                       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+                               msleep(2500);
+                       else
+                               msleep(500);
+                       break;
+               case 6:
+                       shost_printk(KERN_ERR, instance, "bus locked solid\n");
+                       return -ENXIO;
+               }
+       }
        return 0;
 }
 
@@ -812,6 +736,38 @@ static void NCR5380_exit(struct Scsi_Host *instance)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        cancel_work_sync(&hostdata->main_task);
+       destroy_workqueue(hostdata->work_q);
+}
+
+/**
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+                         struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+       dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+       if (hostdata->sensing == cmd) {
+               /* Autosense processing ends here */
+               if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+                       set_host_byte(cmd, DID_ERROR);
+               } else
+                       scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               hostdata->sensing = NULL;
+       }
+
+#ifdef SUPPORT_TAGS
+       cmd_free_tag(cmd);
+#else
+       hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+#endif
+       cmd->scsi_done(cmd);
 }
 
 /**
@@ -819,7 +775,7 @@ static void NCR5380_exit(struct Scsi_Host *instance)
  * @instance: the relevant SCSI adapter
  * @cmd: SCSI command
  *
- * cmd is added to the per instance issue_queue, with minor
+ * cmd is added to the per-instance issue queue, with minor
  * twiddling done to the host specific fields of cmd.  If the
  * main coroutine is not running, it is restarted.
  */
@@ -828,44 +784,23 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
                                  struct scsi_cmnd *cmd)
 {
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       struct scsi_cmnd *tmp;
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
        unsigned long flags;
 
 #if (NDEBUG & NDEBUG_NO_WRITE)
        switch (cmd->cmnd[0]) {
        case WRITE_6:
        case WRITE_10:
-               printk(KERN_NOTICE "scsi%d: WRITE attempted with NO_WRITE debugging flag set\n",
-                      H_NO(cmd));
+               shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
                cmd->result = (DID_ERROR << 16);
                cmd->scsi_done(cmd);
                return 0;
        }
 #endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
-       /*
-        * We use the host_scribble field as a pointer to the next command
-        * in a queue
-        */
-
-       SET_NEXT(cmd, NULL);
        cmd->result = 0;
 
        /*
-        * Insert the cmd into the issue queue. Note that REQUEST SENSE
-        * commands are added to the head of the queue since any command will
-        * clear the contingent allegiance condition that exists and the
-        * sense data is only guaranteed to be valid while the condition exists.
-        */
-
-       /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
-        * Otherwise a running NCR5380_main may steal the lock.
-        * Lock before actually inserting due to fairness reasons explained in
-        * atari_scsi.c. If we insert first, then it's impossible for this driver
-        * to release the lock.
-        * Stop timer for this command while waiting for the lock, or timeouts
-        * may happen (and they really do), and it's no good if the command doesn't
-        * appear in any of the queues.
         * ++roman: Just disabling the NCR interrupt isn't sufficient here,
         * because also a timer int can trigger an abort or reset, which would
         * alter queues and touch the lock.
@@ -873,7 +808,7 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
        if (!NCR5380_acquire_dma_irq(instance))
                return SCSI_MLQUEUE_HOST_BUSY;
 
-       local_irq_save(flags);
+       spin_lock_irqsave(&hostdata->lock, flags);
 
        /*
         * Insert the cmd into the issue queue. Note that REQUEST SENSE
@@ -882,33 +817,18 @@ static int NCR5380_queue_command(struct Scsi_Host *instance,
         * sense data is only guaranteed to be valid while the condition exists.
         */
 
-       if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
-               LIST(cmd, hostdata->issue_queue);
-               SET_NEXT(cmd, hostdata->issue_queue);
-               hostdata->issue_queue = cmd;
-       } else {
-               for (tmp = (struct scsi_cmnd *)hostdata->issue_queue;
-                    NEXT(tmp); tmp = NEXT(tmp))
-                       ;
-               LIST(cmd, tmp);
-               SET_NEXT(tmp, cmd);
-       }
-       local_irq_restore(flags);
+       if (cmd->cmnd[0] == REQUEST_SENSE)
+               list_add(&ncmd->list, &hostdata->unissued);
+       else
+               list_add_tail(&ncmd->list, &hostdata->unissued);
 
-       dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
-                 (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-       /* If queue_command() is called from an interrupt (real one or bottom
-        * half), we let queue_main() do the job of taking care about main. If it
-        * is already running, this is a no-op, else main will be queued.
-        *
-        * If we're not in an interrupt, we can call NCR5380_main()
-        * unconditionally, because it cannot be already running.
-        */
-       if (in_interrupt() || irqs_disabled())
-               queue_main(hostdata);
-       else
-               NCR5380_main(&hostdata->main_task);
+       dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+                cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+
+       /* Kick off command processing */
+       queue_work(hostdata->work_q, &hostdata->main_task);
        return 0;
 }
 
@@ -917,13 +837,78 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
 
        /* Caller does the locking needed to set & test these data atomically */
-       if (!hostdata->disconnected_queue &&
-           !hostdata->issue_queue &&
+       if (list_empty(&hostdata->disconnected) &&
+           list_empty(&hostdata->unissued) &&
+           list_empty(&hostdata->autosense) &&
            !hostdata->connected &&
-           !hostdata->retain_dma_intr)
+           !hostdata->selecting)
                NCR5380_release_dma_irq(instance);
 }
 
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *cmd;
+
+       if (list_empty(&hostdata->autosense)) {
+               list_for_each_entry(ncmd, &hostdata->unissued, list) {
+                       cmd = NCR5380_to_scmd(ncmd);
+                       dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+                                cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+                       if (
+#ifdef SUPPORT_TAGS
+                           !is_lun_busy(cmd, 1)
+#else
+                           !(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))
+#endif
+                       ) {
+                               list_del(&ncmd->list);
+                               dsprintk(NDEBUG_QUEUES, instance,
+                                        "dequeue: removed %p from issue queue\n", cmd);
+                               return cmd;
+                       }
+               }
+       } else {
+               /* Autosense processing begins here */
+               ncmd = list_first_entry(&hostdata->autosense,
+                                       struct NCR5380_cmd, list);
+               list_del(&ncmd->list);
+               cmd = NCR5380_to_scmd(ncmd);
+               dsprintk(NDEBUG_QUEUES, instance,
+                        "dequeue: removed %p from autosense queue\n", cmd);
+               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+               hostdata->sensing = cmd;
+               return cmd;
+       }
+       return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+       if (hostdata->sensing) {
+               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+               list_add(&ncmd->list, &hostdata->autosense);
+               hostdata->sensing = NULL;
+       } else
+               list_add(&ncmd->list, &hostdata->unissued);
+}
+
 /**
  * NCR5380_main - NCR state machines
  *
@@ -931,8 +916,6 @@ static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
  * be done on the NCR5380 host adapters in a system.  Both
  * NCR5380_queue_command() and NCR5380_intr() will try to start it
  * in case it is not running.
- *
- * Locks: called as its own thread with no locks held.
  */
 
 static void NCR5380_main(struct work_struct *work)
@@ -940,154 +923,69 @@ static void NCR5380_main(struct work_struct *work)
        struct NCR5380_hostdata *hostdata =
                container_of(work, struct NCR5380_hostdata, main_task);
        struct Scsi_Host *instance = hostdata->host;
-       struct scsi_cmnd *tmp, *prev;
+       struct scsi_cmnd *cmd;
        int done;
-       unsigned long flags;
 
        /*
-        * We run (with interrupts disabled) until we're sure that none of
-        * the host adapters have anything that can be done, at which point
-        * we set main_running to 0 and exit.
-        *
-        * Interrupts are enabled before doing various other internal
-        * instructions, after we've decided that we need to run through
-        * the loop again.
-        *
-        * this should prevent any race conditions.
-        *
         * ++roman: Just disabling the NCR interrupt isn't sufficient here,
         * because also a timer int can trigger an abort or reset, which can
         * alter queues and touch the Falcon lock.
         */
 
-       /* Tell int handlers main() is now already executing.  Note that
-          no races are possible here. If an int comes in before
-          'main_running' is set here, and queues/executes main via the
-          task queue, it doesn't do any harm, just this instance of main
-          won't find any work left to do. */
-       if (hostdata->main_running)
-               return;
-       hostdata->main_running = 1;
-
-       local_save_flags(flags);
        do {
-               local_irq_disable();    /* Freeze request queues */
                done = 1;
 
-               if (!hostdata->connected) {
-                       dprintk(NDEBUG_MAIN, "scsi%d: not connected\n", HOSTNO);
-                       /*
-                        * Search through the issue_queue for a command destined
-                        * for a target that's not busy.
-                        */
-#if (NDEBUG & NDEBUG_LISTS)
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL;
-                            tmp && (tmp != prev); prev = tmp, tmp = NEXT(tmp))
-                               ;
-                       /*printk("%p  ", tmp);*/
-                       if ((tmp == prev) && tmp)
-                               printk(" LOOP\n");
-                       /* else printk("\n"); */
-#endif
-                       for (tmp = (struct scsi_cmnd *) hostdata->issue_queue,
-                            prev = NULL; tmp; prev = tmp, tmp = NEXT(tmp)) {
-                               u8 lun = tmp->device->lun;
-
-                               dprintk(NDEBUG_LISTS,
-                                       "MAIN tmp=%p target=%d busy=%d lun=%d\n",
-                                       tmp, scmd_id(tmp), hostdata->busy[scmd_id(tmp)],
-                                       lun);
-                               /*  When we find one, remove it from the issue queue. */
-                               /* ++guenther: possible race with Falcon locking */
-                               if (
-#ifdef SUPPORT_TAGS
-                                   !is_lun_busy( tmp, tmp->cmnd[0] != REQUEST_SENSE)
-#else
-                                   !(hostdata->busy[tmp->device->id] & (1 << lun))
-#endif
-                                   ) {
-                                       /* ++guenther: just to be sure, this must be atomic */
-                                       local_irq_disable();
-                                       if (prev) {
-                                               REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
-                                               SET_NEXT(prev, NEXT(tmp));
-                                       } else {
-                                               REMOVE(-1, hostdata->issue_queue, tmp, NEXT(tmp));
-                                               hostdata->issue_queue = NEXT(tmp);
-                                       }
-                                       SET_NEXT(tmp, NULL);
-                                       hostdata->retain_dma_intr++;
+               spin_lock_irq(&hostdata->lock);
+               while (!hostdata->connected &&
+                      (cmd = dequeue_next_cmd(instance))) {
 
-                                       /* reenable interrupts after finding one */
-                                       local_irq_restore(flags);
+                       dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
 
-                                       /*
-                                        * Attempt to establish an I_T_L nexus here.
-                                        * On success, instance->hostdata->connected is set.
-                                        * On failure, we must add the command back to the
-                                        *   issue queue so we can keep trying.
-                                        */
-                                       dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
-                                                   "lun %d removed from issue_queue\n",
-                                                   HOSTNO, tmp->device->id, lun);
-                                       /*
-                                        * REQUEST SENSE commands are issued without tagged
-                                        * queueing, even on SCSI-II devices because the
-                                        * contingent allegiance condition exists for the
-                                        * entire unit.
-                                        */
-                                       /* ++roman: ...and the standard also requires that
-                                        * REQUEST SENSE command are untagged.
-                                        */
+                       /*
+                        * Attempt to establish an I_T_L nexus here.
+                        * On success, instance->hostdata->connected is set.
+                        * On failure, we must add the command back to the
+                        * issue queue so we can keep trying.
+                        */
+                       /*
+                        * REQUEST SENSE commands are issued without tagged
+                        * queueing, even on SCSI-II devices because the
+                        * contingent allegiance condition exists for the
+                        * entire unit.
+                        */
+                       /* ++roman: ...and the standard also requires that
+                        * REQUEST SENSE command are untagged.
+                        */
 
 #ifdef SUPPORT_TAGS
-                                       cmd_get_tag(tmp, tmp->cmnd[0] != REQUEST_SENSE);
+                       cmd_get_tag(cmd, cmd->cmnd[0] != REQUEST_SENSE);
 #endif
-                                       if (!NCR5380_select(instance, tmp)) {
-                                               local_irq_disable();
-                                               hostdata->retain_dma_intr--;
-                                               /* release if target did not response! */
-                                               maybe_release_dma_irq(instance);
-                                               local_irq_restore(flags);
-                                               break;
-                                       } else {
-                                               local_irq_disable();
-                                               LIST(tmp, hostdata->issue_queue);
-                                               SET_NEXT(tmp, hostdata->issue_queue);
-                                               hostdata->issue_queue = tmp;
+                       cmd = NCR5380_select(instance, cmd);
+                       if (!cmd) {
+                               dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
+                               maybe_release_dma_irq(instance);
+                       } else {
+                               dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+                                        "main: select failed, returning %p to queue\n", cmd);
+                               requeue_cmd(instance, cmd);
 #ifdef SUPPORT_TAGS
-                                               cmd_free_tag(tmp);
+                               cmd_free_tag(cmd);
 #endif
-                                               hostdata->retain_dma_intr--;
-                                               local_irq_restore(flags);
-                                               dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
-                                                           "returned to issue_queue\n", HOSTNO);
-                                               if (hostdata->connected)
-                                                       break;
-                                       }
-                               } /* if target/lun/target queue is not busy */
-                       } /* for issue_queue */
-               } /* if (!hostdata->connected) */
-
+                       }
+               }
                if (hostdata->connected
 #ifdef REAL_DMA
                    && !hostdata->dma_len
 #endif
                    ) {
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
-                                   HOSTNO);
+                       dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
                        NCR5380_information_transfer(instance);
-                       dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
                        done = 0;
                }
+               spin_unlock_irq(&hostdata->lock);
+               if (!done)
+                       cond_resched();
        } while (!done);
-
-       /* Better allow ints _after_ 'main_running' has been cleared, else
-          an interrupt could believe we'll pick up the work it left for
-          us, but we won't see it anymore here... */
-       hostdata->main_running = 0;
-       local_irq_restore(flags);
 }
 
 
@@ -1096,27 +994,20 @@ static void NCR5380_main(struct work_struct *work)
  * Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
  *
  * Purpose : Called by interrupt handler when DMA finishes or a phase
- *     mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
  *
  * Inputs : instance - this instance of the NCR5380.
- *
  */
 
 static void NCR5380_dma_complete(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int transferred;
        unsigned char **data;
-       volatile int *count;
+       int *count;
        int saved_data = 0, overrun = 0;
        unsigned char p;
 
-       if (!hostdata->connected) {
-               printk(KERN_WARNING "scsi%d: received end of DMA interrupt with "
-                      "no connected cmd\n", HOSTNO);
-               return;
-       }
-
        if (hostdata->read_overruns) {
                p = hostdata->connected->SCp.phase;
                if (p & SR_IO) {
@@ -1126,15 +1017,11 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
                            (BASR_PHASE_MATCH|BASR_ACK)) {
                                saved_data = NCR5380_read(INPUT_DATA_REG);
                                overrun = 1;
-                               dprintk(NDEBUG_DMA, "scsi%d: read overrun handled\n", HOSTNO);
+                               dsprintk(NDEBUG_DMA, instance, "read overrun handled\n");
                        }
                }
        }
 
-       dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
-                  HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
-                  NCR5380_read(STATUS_REG));
-
 #if defined(CONFIG_SUN3)
        if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request)))) {
                pr_err("scsi%d: overrun in UDC counter -- not prepared to deal with this!\n",
@@ -1153,9 +1040,9 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
        }
 #endif
 
-       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
        transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
        hostdata->dma_len = 0;
@@ -1194,140 +1081,160 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
  * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
  * from the disconnected queue, and restarting NCR5380_main()
  * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
 static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 {
        struct Scsi_Host *instance = dev_id;
-       int done = 1, handled = 0;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       int handled = 0;
        unsigned char basr;
+       unsigned long flags;
 
-       dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-       /* Look for pending interrupts */
        basr = NCR5380_read(BUS_AND_STATUS_REG);
-       dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
-       /* dispatch to appropriate routine if found and done=0 */
        if (basr & BASR_IRQ) {
-               NCR5380_dprint(NDEBUG_INTR, instance);
-               if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
-                       done = 0;
-                       dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
-                       NCR5380_reselect(instance);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else if (basr & BASR_PARITY_ERROR) {
-                       dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
-                       dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
-                       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-               } else {
-                       /*
-                        * The rest of the interrupt conditions can occur only during a
-                        * DMA transfer
-                        */
+               unsigned char mr = NCR5380_read(MODE_REG);
+               unsigned char sr = NCR5380_read(STATUS_REG);
+
+               dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+                        irq, basr, sr, mr);
 
 #if defined(REAL_DMA)
-                       /*
-                        * We should only get PHASE MISMATCH and EOP interrupts if we have
-                        * DMA enabled, so do a sanity check based on the current setting
-                        * of the MODE register.
+               if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+                       /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+                        * We ack IRQ after clearing Mode Register. Workarounds
+                        * for End of DMA errata need to happen in DMA Mode.
                         */
 
-                       if ((NCR5380_read(MODE_REG) & MR_DMA_MODE) &&
-                           ((basr & BASR_END_DMA_TRANSFER) ||
-                            !(basr & BASR_PHASE_MATCH))) {
+                       dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
 
-                               dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
-                               NCR5380_dma_complete( instance );
-                               done = 0;
-                       } else
+                       if (hostdata->connected) {
+                               NCR5380_dma_complete(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
+                       } else {
+                               NCR5380_write(MODE_REG, MR_BASE);
+                               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+                       }
+               } else
 #endif /* REAL_DMA */
-                       {
-/* MS: Ignore unknown phase mismatch interrupts (caused by EOP interrupt) */
-                               if (basr & BASR_PHASE_MATCH)
-                                       dprintk(NDEBUG_INTR, "scsi%d: unknown interrupt, "
-                                              "BASR 0x%x, MR 0x%x, SR 0x%x\n",
-                                              HOSTNO, basr, NCR5380_read(MODE_REG),
-                                              NCR5380_read(STATUS_REG));
-                               (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+               if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+                   (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+                       /* Probably reselected */
+                       NCR5380_write(SELECT_ENABLE_REG, 0);
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+                       if (!hostdata->connected) {
+                               NCR5380_reselect(instance);
+                               queue_work(hostdata->work_q, &hostdata->main_task);
+                       }
+                       if (!hostdata->connected)
+                               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               } else {
+                       /* Probably Bus Reset */
+                       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+                       dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
 #ifdef SUN3_SCSI_VME
-                               dregs->csr |= CSR_DMA_ENABLE;
+                       dregs->csr |= CSR_DMA_ENABLE;
 #endif
-                       }
-               } /* if !(SELECTION || PARITY) */
+               }
                handled = 1;
-       } /* BASR & IRQ */ else {
-               printk(KERN_NOTICE "scsi%d: interrupt without IRQ bit set in BASR, "
-                      "BASR 0x%X, MR 0x%X, SR 0x%x\n", HOSTNO, basr,
-                      NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
-               (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       } else {
+               shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
 #ifdef SUN3_SCSI_VME
                dregs->csr |= CSR_DMA_ENABLE;
 #endif
        }
 
-       if (!done) {
-               dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
-               /* Put a call to NCR5380_main() on the queue... */
-               queue_main(shost_priv(instance));
-       }
+       spin_unlock_irqrestore(&hostdata->lock, flags);
+
        return IRQ_RETVAL(handled);
 }
 
 /*
  * Function : int NCR5380_select(struct Scsi_Host *instance,
- *                               struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
  *
  * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- *     including ARBITRATION, SELECTION, and initial message out for
- *     IDENTIFY and queue messages.
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
  *
  * Inputs : instance - instantiation of the 5380 driver on which this
- *     target lives, cmd - SCSI command to execute.
+ * target lives, cmd - SCSI command to execute.
  *
- * Returns : -1 if selection could not execute for some reason,
- *     0 if selection succeeded or failed because the target
- *     did not respond.
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
  *
  * Side effects :
- *     If bus busy, arbitration failed, etc, NCR5380_select() will exit
- *             with registers as they should have been on entry - ie
- *             SELECT_ENABLE will be set appropriately, the NCR5380
- *             will cease to drive any SCSI bus signals.
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
  *
- *     If successful : I_T_L or I_T_L_Q nexus will be established,
- *             instance->connected will be set to cmd.
- *             SELECT interrupt will be disabled.
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
  *
- *     If failed (no target) : cmd->scsi_done() will be called, and the
- *             cmd->result host byte set to DID_BAD_TARGET.
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
  */
 
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char tmp[3], phase;
        unsigned char *data;
        int len;
-       unsigned long timeout;
-       unsigned long flags;
+       int err;
 
-       hostdata->restart_select = 0;
        NCR5380_dprint(NDEBUG_ARBITRATION, instance);
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
-                  instance->this_id);
+       dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+                instance->this_id);
+
+       /*
+        * Arbitration and selection phases are slow and involve dropping the
+        * lock, so we have to watch out for EH. An exception handler may
+        * change 'selecting' to NULL. This function will then return NULL
+        * so that the caller will forget about 'cmd'. (During information
+        * transfer phases, EH may change 'connected' to NULL.)
+        */
+       hostdata->selecting = cmd;
 
        /*
         * Set the phase bits to 0, otherwise the NCR5380 won't drive the
         * data bus during SELECTION.
         */
 
-       local_irq_save(flags);
-       if (hostdata->connected) {
-               local_irq_restore(flags);
-               return -1;
-       }
        NCR5380_write(TARGET_COMMAND_REG, 0);
 
        /*
@@ -1337,96 +1244,77 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
        NCR5380_write(MODE_REG, MR_ARBITRATE);
 
-       local_irq_restore(flags);
-
-       /* Wait for arbitration logic to complete */
-#if defined(NCR_TIMEOUT)
-       {
-               unsigned long timeout = jiffies + 2*NCR_TIMEOUT;
+       /* The chip now waits for BUS FREE phase. Then after the 800 ns
+        * Bus Free Delay, arbitration will begin.
+        */
 
-               while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
-                      time_before(jiffies, timeout) && !hostdata->connected)
-                       ;
-               if (time_after_eq(jiffies, timeout)) {
-                       printk("scsi : arbitration timeout at %d\n", __LINE__);
-                       NCR5380_write(MODE_REG, MR_BASE);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
+       spin_unlock_irq(&hostdata->lock);
+       err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+                       INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+                                              ICR_ARBITRATION_PROGRESS, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+               /* Reselection interrupt */
+               goto out;
        }
-#else /* NCR_TIMEOUT */
-       while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
-              !hostdata->connected)
-               ;
-#endif
-
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
-
-       if (hostdata->connected) {
+       if (err < 0) {
                NCR5380_write(MODE_REG, MR_BASE);
-               return -1;
+               shost_printk(KERN_ERR, instance,
+                            "select: arbitration timeout\n");
+               goto out;
        }
-       /*
-        * The arbitration delay is 2.2us, but this is a minimum and there is
-        * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
-        * the integral nature of udelay().
-        *
-        */
+       spin_unlock_irq(&hostdata->lock);
 
+       /* The SCSI-2 arbitration delay is 2.4 us */
        udelay(3);
 
        /* Check for lost arbitration */
        if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
            (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
-           (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
-           hostdata->connected) {
+           (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
                NCR5380_write(MODE_REG, MR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
-                          HOSTNO);
-               return -1;
+               dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+               spin_lock_irq(&hostdata->lock);
+               goto out;
        }
 
-       /* after/during arbitration, BSY should be asserted.
-          IBM DPES-31080 Version S31Q works now */
-       /* Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman) */
+       /* After/during arbitration, BSY should be asserted.
+        * IBM DPES-31080 Version S31Q works now
+        * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+        */
        NCR5380_write(INITIATOR_COMMAND_REG,
                      ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
 
-       if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
-           hostdata->connected) {
-               NCR5380_write(MODE_REG, MR_BASE);
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
-                          HOSTNO);
-               return -1;
-       }
-
        /*
         * Again, bus clear + bus settle time is 1.2us, however, this is
         * a minimum so we'll udelay ceil(1.2)
         */
 
-#ifdef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-       /* ++roman: But some targets (see above :-) seem to need a bit more... */
-       udelay(15);
-#else
-       udelay(2);
-#endif
+       if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+               udelay(15);
+       else
+               udelay(2);
 
-       if (hostdata->connected) {
+       spin_lock_irq(&hostdata->lock);
+
+       /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+       if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+               goto out;
+
+       if (!hostdata->selecting) {
                NCR5380_write(MODE_REG, MR_BASE);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               return -1;
+               goto out;
        }
 
-       dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
+       dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
 
        /*
         * Now that we have won arbitration, start Selection process, asserting
         * the host and target ID's on the SCSI bus.
         */
 
-       NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << cmd->device->id)));
+       NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
 
        /*
         * Raise ATN while SEL is true before BSY goes false from arbitration,
@@ -1434,22 +1322,18 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * phase immediately after selection.
         */
 
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY |
-                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL ));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+                     ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
        NCR5380_write(MODE_REG, MR_BASE);
 
        /*
         * Reselect interrupts must be turned off prior to the dropping of BSY,
         * otherwise we will trigger an interrupt.
         */
-
-       if (hostdata->connected) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               return -1;
-       }
-
        NCR5380_write(SELECT_ENABLE_REG, 0);
 
+       spin_unlock_irq(&hostdata->lock);
+
        /*
         * The initiator shall then wait at least two deskew delays and release
         * the BSY signal.
@@ -1457,8 +1341,8 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        udelay(1);        /* wingel -- wait two bus deskew delay >2*45ns */
 
        /* Reset BSY */
-       NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA |
-                     ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+                     ICR_ASSERT_ATN | ICR_ASSERT_SEL);
 
        /*
         * Something weird happens when we cease to drive BSY - looks
@@ -1479,45 +1363,39 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        udelay(1);
 
-       dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+       dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
 
        /*
         * The SCSI specification calls for a 250 ms timeout for the actual
         * selection.
         */
 
-       timeout = jiffies + msecs_to_jiffies(250);
-
-       /*
-        * XXX very interesting - we're seeing a bounce where the BSY we
-        * asserted is being reflected / still asserted (propagation delay?)
-        * and it's detecting as true.  Sigh.
-        */
-
-#if 0
-       /* ++roman: If a target conformed to the SCSI standard, it wouldn't assert
-        * IO while SEL is true. But again, there are some disks out the in the
-        * world that do that nevertheless. (Somebody claimed that this announces
-        * reselection capability of the target.) So we better skip that test and
-        * only wait for BSY... (Famous german words: Der Klügere gibt nach :-)
-        */
-
-       while (time_before(jiffies, timeout) &&
-              !(NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO)))
-               ;
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+                                   msecs_to_jiffies(250));
 
        if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+               spin_lock_irq(&hostdata->lock);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_reselect(instance);
-               printk(KERN_ERR "scsi%d: reselection after won arbitration?\n",
-                      HOSTNO);
+               if (!hostdata->connected)
+                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+               goto out;
+       }
+
+       if (err < 0) {
+               spin_lock_irq(&hostdata->lock);
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return -1;
+               /* Can't touch cmd if it has been reclaimed by the scsi ML */
+               if (hostdata->selecting) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       complete_cmd(instance, cmd);
+                       dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+                       cmd = NULL;
+               }
+               goto out;
        }
-#else
-       while (time_before(jiffies, timeout) && !(NCR5380_read(STATUS_REG) & SR_BSY))
-               ;
-#endif
 
        /*
         * No less than two deskew delays after the initiator detects the
@@ -1525,32 +1403,9 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * change the DATA BUS.                                     -wingel
         */
 
-       udelay(1);
-
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-
-       if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-               if (hostdata->targets_present & (1 << cmd->device->id)) {
-                       printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
-                       if (hostdata->restart_select)
-                               printk(KERN_NOTICE "\trestart select\n");
-                       NCR5380_dprint(NDEBUG_ANY, instance);
-                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                       return -1;
-               }
-               cmd->result = DID_BAD_TARGET << 16;
-#ifdef SUPPORT_TAGS
-               cmd_free_tag(cmd);
-#endif
-               cmd->scsi_done(cmd);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
-               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-               return 0;
-       }
-
-       hostdata->targets_present |= (1 << cmd->device->id);
+       udelay(1);
+
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
 
        /*
         * Since we followed the SCSI spec, and raised ATN while SEL
@@ -1563,16 +1418,27 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
         * until it wraps back to 0.
         *
         * XXX - it turns out that there are some broken SCSI-II devices,
-        *           which claim to support tagged queuing but fail when more than
-        *           some number of commands are issued at once.
+        * which claim to support tagged queuing but fail when more than
+        * some number of commands are issued at once.
         */
 
        /* Wait for start of REQ/ACK handshake */
-       while (!(NCR5380_read(STATUS_REG) & SR_REQ))
-               ;
 
-       dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
-                  HOSTNO, cmd->device->id);
+       err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+       spin_lock_irq(&hostdata->lock);
+       if (err < 0) {
+               shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+               goto out;
+       }
+       if (!hostdata->selecting) {
+               do_abort(instance);
+               goto out;
+       }
+
+       dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+                scmd_id(cmd));
        tmp[0] = IDENTIFY(1, cmd->device->lun);
 
 #ifdef SUPPORT_TAGS
@@ -1591,11 +1457,12 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
        data = tmp;
        phase = PHASE_MSGOUT;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
-       dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
+       dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
        /* XXX need to handle errors here */
+
        hostdata->connected = cmd;
 #ifndef SUPPORT_TAGS
-       hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
+       hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 #endif
 #ifdef SUN3_SCSI_VME
        dregs->csr |= CSR_INTR;
@@ -1603,24 +1470,30 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
        initialize_SCp(cmd);
 
-       return 0;
+       cmd = NULL;
+
+out:
+       if (!hostdata->selecting)
+               return NULL;
+       hostdata->selecting = NULL;
+       return cmd;
 }
 
 /*
  * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- *      unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using polled I/O
  *
  * Inputs : instance - instance of driver, *phase - pointer to
- *     what phase is expected, *count - pointer to number of
- *     bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
  * Returns : -1 when different phase is entered without transferring
- *     maximum number of bytes, 0 if all bytes are transferred or exit
- *     is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
  *
- *     Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
  *
  * XXX Note : handling for bus free may be useful.
  */
@@ -1635,9 +1508,9 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                                unsigned char *phase, int *count,
                                unsigned char **data)
 {
-       register unsigned char p = *phase, tmp;
-       register int c = *count;
-       register unsigned char *d = *data;
+       unsigned char p = *phase, tmp;
+       int c = *count;
+       unsigned char *d = *data;
 
        /*
         * The NCR5380 chip will only drive the SCSI bus when the
@@ -1652,14 +1525,15 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                 * Wait for assertion of REQ, after which the phase bits will be
                 * valid
                 */
-               while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
-                       ;
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
+               if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+                       break;
+
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
 
                /* Check for phase mismatch */
-               if ((tmp & PHASE_MASK) != p) {
-                       dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+               if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+                       dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
                        NCR5380_dprint_phase(NDEBUG_PIO, instance);
                        break;
                }
@@ -1684,35 +1558,36 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
                                NCR5380_dprint(NDEBUG_PIO, instance);
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ACK);
                        } else {
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN);
                                NCR5380_dprint(NDEBUG_PIO, instance);
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+                                             ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
                        }
                } else {
                        NCR5380_dprint(NDEBUG_PIO, instance);
                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
                }
 
-               while (NCR5380_read(STATUS_REG) & SR_REQ)
-                       ;
+               if (NCR5380_poll_politely(instance,
+                                         STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+                       break;
 
-               dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
+               dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
 
-               /*
               * We have several special cases to consider during REQ/ACK handshaking :
               * 1.  We were in MSGOUT phase, and we are on the last byte of the
               *      message.  ATN must be dropped as ACK is dropped.
               *
               * 2.  We are in a MSGIN phase, and we are on the last byte of the
               *      message.  We must exit with ACK asserted, so that the calling
               *      code may raise ATN before dropping ACK to reject the message.
               *
               * 3.  ACK and ATN are clear and the target may proceed as normal.
               */
+/*
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1.  We were in MSGOUT phase, and we are on the last byte of the
* message.  ATN must be dropped as ACK is dropped.
+ *
+ * 2.  We are in a MSGIN phase, and we are on the last byte of the
* message.  We must exit with ACK asserted, so that the calling
* code may raise ATN before dropping ACK to reject the message.
+ *
+ * 3.  ACK and ATN are clear and the target may proceed as normal.
+ */
                if (!(p == PHASE_MSGIN && c == 1)) {
                        if (p == PHASE_MSGOUT && c > 1)
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1721,16 +1596,16 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                }
        } while (--c);
 
-       dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
+       dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
 
        *count = c;
        *data = d;
        tmp = NCR5380_read(STATUS_REG);
        /* The phase read from the bus is valid if either REQ is (already)
-        * asserted or if ACK hasn't been released yet. The latter is the case if
-        * we're in MSGIN and all wanted bytes have been received.
+        * asserted or if ACK hasn't been released yet. The latter applies if
+        * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
         */
-       if ((tmp & SR_REQ) || (p == PHASE_MSGIN && c == 0))
+       if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
                *phase = tmp & PHASE_MASK;
        else
                *phase = PHASE_UNKNOWN;
@@ -1741,19 +1616,45 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
                return -1;
 }
 
-/*
- * Function : do_abort (Scsi_Host *host)
+/**
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
+ *
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
  *
- * Purpose : abort the currently established nexus.  Should only be
- *     called from a routine which can drop into a
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
+ */
+
+static void do_reset(struct Scsi_Host *instance)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       NCR5380_write(TARGET_COMMAND_REG,
+                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
+       udelay(50);
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+       local_irq_restore(flags);
+}
+
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
  *
- * Returns 0 on success, -1 on failure.
+ * Returns 0 on success, -1 on failure.
  */
 
 static int do_abort(struct Scsi_Host *instance)
 {
-       unsigned char tmp, *msgptr, phase;
+       unsigned char *msgptr, phase, tmp;
        int len;
+       int rc;
 
        /* Request message out phase */
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1768,16 +1669,20 @@ static int do_abort(struct Scsi_Host *instance)
         * the target sees, so we just handshake.
         */
 
-       while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
-               ;
+       rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+       if (rc < 0)
+               goto timeout;
+
+       tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
-       if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
-               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
-                             ICR_ASSERT_ACK);
-               while (NCR5380_read(STATUS_REG) & SR_REQ)
-                       ;
+       if (tmp != PHASE_MSGOUT) {
+               NCR5380_write(INITIATOR_COMMAND_REG,
+                             ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+               rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+               if (rc < 0)
+                       goto timeout;
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
        }
 
@@ -1793,26 +1698,29 @@ static int do_abort(struct Scsi_Host *instance)
         */
 
        return len ? -1 : 0;
+
+timeout:
+       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+       return -1;
 }
 
 #if defined(REAL_DMA)
 /*
  * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- *      unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
  *
  * Purpose : transfers data in given phase using either real
- *     or pseudo DMA.
+ * or pseudo DMA.
  *
  * Inputs : instance - instance of driver, *phase - pointer to
- *     what phase is expected, *count - pointer to number of
- *     bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
  *
  * Returns : -1 when different phase is entered without transferring
- *     maximum number of bytes, 0 if all bytes or transferred or exit
- *     is in same phase.
- *
- *     Also, *phase, *count, *data are modified in place.
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
  *
+ * Also, *phase, *count, *data are modified in place.
  */
 
 
@@ -1820,10 +1728,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
                                unsigned char *phase, int *count,
                                unsigned char **data)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        register int c = *count;
        register unsigned char p = *phase;
-       unsigned long flags;
 
 #if defined(CONFIG_SUN3)
        /* sanity check */
@@ -1834,29 +1741,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        }
        hostdata->dma_len = c;
 
-       dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
-               instance->host_no, (p & SR_IO) ? "reading" : "writing",
-               c, (p & SR_IO) ? "to" : "from", *data);
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, *data);
 
        /* netbsd turns off ints here, why not be safe and do it too */
-       local_irq_save(flags);
 
        /* send start chain */
        sun3scsi_dma_start(c, *data);
 
+       NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
        if (p & SR_IO) {
-               NCR5380_write(TARGET_COMMAND_REG, 1);
-               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
                NCR5380_write(INITIATOR_COMMAND_REG, 0);
-               NCR5380_write(MODE_REG,
-                             (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
                NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
        } else {
-               NCR5380_write(TARGET_COMMAND_REG, 0);
-               NCR5380_read(RESET_PARITY_INTERRUPT_REG);
                NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_DATA);
-               NCR5380_write(MODE_REG,
-                             (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
                NCR5380_write(START_DMA_SEND_REG, 0);
        }
 
@@ -1864,8 +1764,6 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        dregs->csr |= CSR_DMA_ENABLE;
 #endif
 
-       local_irq_restore(flags);
-
        sun3_dma_active = 1;
 
 #else /* !defined(CONFIG_SUN3) */
@@ -1880,25 +1778,20 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
        if (hostdata->read_overruns && (p & SR_IO))
                c -= hostdata->read_overruns;
 
-       dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
-                  HOSTNO, (p & SR_IO) ? "reading" : "writing",
-                  c, (p & SR_IO) ? "to" : "from", d);
+       dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+                (p & SR_IO) ? "receive" : "send", c, d);
 
        NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-#ifdef REAL_DMA
-       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
-#endif /* def REAL_DMA  */
+       NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+                               MR_ENABLE_EOP_INTR);
 
        if (!(hostdata->flags & FLAG_LATE_DMA_SETUP)) {
                /* On the Medusa, it is a must to initialize the DMA before
                 * starting the NCR. This is also the cleaner way for the TT.
                 */
-               local_irq_save(flags);
                hostdata->dma_len = (p & SR_IO) ?
                        NCR5380_dma_read_setup(instance, d, c) :
                        NCR5380_dma_write_setup(instance, d, c);
-               local_irq_restore(flags);
        }
 
        if (p & SR_IO)
@@ -1912,11 +1805,9 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
                /* On the Falcon, the DMA setup must be done after the last */
                /* NCR access, else the DMA setup gets trashed!
                 */
-               local_irq_save(flags);
                hostdata->dma_len = (p & SR_IO) ?
                        NCR5380_dma_read_setup(instance, d, c) :
                        NCR5380_dma_write_setup(instance, d, c);
-               local_irq_restore(flags);
        }
 #endif /* !defined(CONFIG_SUN3) */
 
@@ -1928,23 +1819,22 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
  *
  * Purpose : run through the various SCSI phases and do as the target
- *     directs us to.  Operates on the currently connected command,
- *     instance->connected.
+ * directs us to.  Operates on the currently connected command,
+ * instance->connected.
  *
  * Inputs : instance, instance for which we are doing commands
  *
  * Side effects : SCSI things happen, the disconnected queue will be
- *     modified if a command disconnects, *instance->connected will
- *     change.
+ * modified if a command disconnects, *instance->connected will
+ * change.
  *
  * XXX Note : we need to watch for bus free or a reset condition here
- *     to recover from an unexpected bus free condition.
+ * to recover from an unexpected bus free condition.
  */
 
 static void NCR5380_information_transfer(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
-       unsigned long flags;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char msgout = NOP;
        int sink = 0;
        int len;
@@ -1953,13 +1843,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 #endif
        unsigned char *data;
        unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
-       struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
+       struct scsi_cmnd *cmd;
 
 #ifdef SUN3_SCSI_VME
        dregs->csr |= CSR_INTR;
 #endif
 
-       while (1) {
+       while ((cmd = hostdata->connected)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
                tmp = NCR5380_read(STATUS_REG);
                /* We only have a valid SCSI phase when REQ is asserted */
                if (tmp & SR_REQ) {
@@ -1984,7 +1876,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                /* this command setup for dma yet? */
                                if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != cmd)) {
                                        if (cmd->request->cmd_type == REQ_TYPE_FS) {
-                                               sun3scsi_dma_setup(d, count,
+                                               sun3scsi_dma_setup(instance, d, count,
                                                                   rq_data_dir(cmd->request));
                                                sun3_dma_setup_done = cmd;
                                        }
@@ -2000,11 +1892,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
 
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
-                                             ICR_ASSERT_ACK);
+                                             ICR_ASSERT_ACK);
                                while (NCR5380_read(STATUS_REG) & SR_REQ)
                                        ;
                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                             ICR_ASSERT_ATN);
+                                             ICR_ASSERT_ATN);
                                sink = 0;
                                continue;
                        }
@@ -2012,12 +1904,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                        switch (phase) {
                        case PHASE_DATAOUT:
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
-                               printk("scsi%d: NDEBUG_NO_DATAOUT set, attempted DATAOUT "
-                                      "aborted\n", HOSTNO);
+                               shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
                                sink = 1;
                                do_abort(instance);
                                cmd->result = DID_ERROR << 16;
-                               cmd->scsi_done(cmd);
+                               complete_cmd(instance, cmd);
                                return;
 #endif
                        case PHASE_DATAIN:
@@ -2031,13 +1922,10 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                        --cmd->SCp.buffers_residual;
                                        cmd->SCp.this_residual = cmd->SCp.buffer->length;
                                        cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
-                                       /* ++roman: Try to merge some scatter-buffers if
-                                        * they are at contiguous physical addresses.
-                                        */
                                        merge_contiguous_buffers(cmd);
-                                       dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
-                                                  HOSTNO, cmd->SCp.this_residual,
-                                                  cmd->SCp.buffers_residual);
+                                       dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+                                                cmd->SCp.this_residual,
+                                                cmd->SCp.buffers_residual);
                                }
 
                                /*
@@ -2051,16 +1939,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                 */
 
                                /* ++roman: I suggest, this should be
-                                *   #if def(REAL_DMA)
+                                * #if def(REAL_DMA)
                                 * instead of leaving REAL_DMA out.
                                 */
 
 #if defined(REAL_DMA)
-                               if (
 #if !defined(CONFIG_SUN3)
-                                   !cmd->device->borken &&
+                               transfersize = 0;
+                               if (!cmd->device->borken)
 #endif
-                                   (transfersize = NCR5380_dma_xfer_len(instance, cmd, phase)) >= DMA_MIN_SIZE) {
+                                       transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+                               if (transfersize >= DMA_MIN_SIZE) {
                                        len = transfersize;
                                        cmd->SCp.phase = phase;
                                        if (NCR5380_transfer_dma(instance, &phase,
@@ -2068,16 +1958,15 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                /*
                                                 * If the watchdog timer fires, all future
                                                 * accesses to this device will use the
-                                                * polled-IO. */
+                                                * polled-IO.
+                                                */
                                                scmd_printk(KERN_INFO, cmd,
                                                        "switching to slow handshake\n");
                                                cmd->device->borken = 1;
-                                               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
-                                                       ICR_ASSERT_ATN);
                                                sink = 1;
                                                do_abort(instance);
                                                cmd->result = DID_ERROR << 16;
-                                               cmd->scsi_done(cmd);
+                                               complete_cmd(instance, cmd);
                                                /* XXX - need to source or sink data here, as appropriate */
                                        } else {
 #ifdef REAL_DMA
@@ -2093,9 +1982,13 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                        }
                                } else
 #endif /* defined(REAL_DMA) */
+                               {
+                                       spin_unlock_irq(&hostdata->lock);
                                        NCR5380_transfer_pio(instance, &phase,
-                                                            (int *)&cmd->SCp.this_residual,
-                                                            (unsigned char **)&cmd->SCp.ptr);
+                                                            (int *)&cmd->SCp.this_residual,
+                                                            (unsigned char **)&cmd->SCp.ptr);
+                                       spin_lock_irq(&hostdata->lock);
+                               }
 #if defined(CONFIG_SUN3) && defined(REAL_DMA)
                                /* if we had intended to dma that command clear it */
                                if (sun3_dma_setup_done == cmd)
@@ -2105,162 +1998,64 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                        case PHASE_MSGIN:
                                len = 1;
                                data = &tmp;
-                               NCR5380_write(SELECT_ENABLE_REG, 0);    /* disable reselects */
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                cmd->SCp.Message = tmp;
 
                                switch (tmp) {
-                               /*
-                                * Linking lets us reduce the time required to get the
-                                * next command out to the device, hopefully this will
-                                * mean we don't waste another revolution due to the delays
-                                * required by ARBITRATION and another SELECTION.
-                                *
-                                * In the current implementation proposal, low level drivers
-                                * merely have to start the next command, pointed to by
-                                * next_link, done() is called as with unlinked commands.
-                                */
-#ifdef LINKED
-                               case LINKED_CMD_COMPLETE:
-                               case LINKED_FLG_CMD_COMPLETE:
-                                       /* Accept message by clearing ACK */
-                                       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
-                                       dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked command "
-                                                  "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
-
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /*
-                                        * Sanity check : A linked command should only terminate
-                                        * with one of these messages if there are more linked
-                                        * commands available.
-                                        */
-
-                                       if (!cmd->next_link) {
-                                                printk(KERN_NOTICE "scsi%d: target %d lun %llu "
-                                                       "linked command complete, no next_link\n",
-                                                       HOSTNO, cmd->device->id, cmd->device->lun);
-                                               sink = 1;
-                                               do_abort(instance);
-                                               return;
-                                       }
-
-                                       initialize_SCp(cmd->next_link);
-                                       /* The next command is still part of this process; copy it
-                                        * and don't free it! */
-                                       cmd->next_link->tag = cmd->tag;
-                                       cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked request "
-                                                  "done, calling scsi_done().\n",
-                                                  HOSTNO, cmd->device->id, cmd->device->lun);
-                                       cmd->scsi_done(cmd);
-                                       cmd = hostdata->connected;
-                                       break;
-#endif /* def LINKED */
                                case ABORT:
                                case COMMAND_COMPLETE:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %llu "
-                                                 "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
+                                       dsprintk(NDEBUG_QUEUES, instance,
+                                                "COMMAND COMPLETE %p target %d lun %llu\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
 
-                                       local_irq_save(flags);
-                                       hostdata->retain_dma_intr++;
                                        hostdata->connected = NULL;
 #ifdef SUPPORT_TAGS
                                        cmd_free_tag(cmd);
                                        if (status_byte(cmd->SCp.Status) == QUEUE_FULL) {
-                                               /* Turn a QUEUE FULL status into BUSY, I think the
-                                                * mid level cannot handle QUEUE FULL :-( (The
-                                                * command is retried after BUSY). Also update our
-                                                * queue size to the number of currently issued
-                                                * commands now.
-                                                */
-                                               /* ++Andreas: the mid level code knows about
-                                                  QUEUE_FULL now. */
-                                               struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][cmd->device->lun];
-                                               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu returned "
-                                                          "QUEUE_FULL after %d commands\n",
-                                                          HOSTNO, cmd->device->id, cmd->device->lun,
-                                                          ta->nr_allocated);
+                                               u8 lun = cmd->device->lun;
+                                               struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
+
+                                               dsprintk(NDEBUG_TAGS, instance,
+                                                        "QUEUE_FULL %p target %d lun %d nr_allocated %d\n",
+                                                        cmd, scmd_id(cmd), lun, ta->nr_allocated);
                                                if (ta->queue_size > ta->nr_allocated)
-                                                       ta->nr_allocated = ta->queue_size;
+                                                       ta->queue_size = ta->nr_allocated;
                                        }
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
 #endif
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-
-                                       /*
-                                        * I'm not sure what the correct thing to do here is :
-                                        *
-                                        * If the command that just executed is NOT a request
-                                        * sense, the obvious thing to do is to set the result
-                                        * code to the values of the stored parameters.
-                                        *
-                                        * If it was a REQUEST SENSE command, we need some way to
-                                        * differentiate between the failure code of the original
-                                        * and the failure code of the REQUEST sense - the obvious
-                                        * case is success, where we fall through and leave the
-                                        * result code unchanged.
-                                        *
-                                        * The non-obvious place is where the REQUEST SENSE failed
-                                        */
-
-                                       if (cmd->cmnd[0] != REQUEST_SENSE)
-                                               cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
-                                       else if (status_byte(cmd->SCp.Status) != GOOD)
-                                               cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
-                                       if ((cmd->cmnd[0] == REQUEST_SENSE) &&
-                                               hostdata->ses.cmd_len) {
-                                               scsi_eh_restore_cmnd(cmd, &hostdata->ses);
-                                               hostdata->ses.cmd_len = 0 ;
-                                       }
-
-                                       if ((cmd->cmnd[0] != REQUEST_SENSE) &&
-                                           (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
-                                               scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
 
-                                               dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n", HOSTNO);
-
-                                               LIST(cmd,hostdata->issue_queue);
-                                               SET_NEXT(cmd, hostdata->issue_queue);
-                                               hostdata->issue_queue = (struct scsi_cmnd *) cmd;
-                                               dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
-                                                         "issue queue\n", H_NO(cmd));
-                                       } else {
-                                               cmd->scsi_done(cmd);
+                                       cmd->result &= ~0xffff;
+                                       cmd->result |= cmd->SCp.Status;
+                                       cmd->result |= cmd->SCp.Message << 8;
+
+                                       if (cmd->cmnd[0] == REQUEST_SENSE)
+                                               complete_cmd(instance, cmd);
+                                       else {
+                                               if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+                                                   cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+                                                       dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+                                                                cmd);
+                                                       list_add_tail(&ncmd->list,
+                                                                     &hostdata->autosense);
+                                               } else
+                                                       complete_cmd(instance, cmd);
                                        }
 
-                                       local_irq_restore(flags);
-
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        /*
                                         * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
                                         */
                                        NCR5380_write(TARGET_COMMAND_REG, 0);
 
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
+                                       /* Enable reselect interrupts */
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 
-                                       local_irq_save(flags);
-                                       hostdata->retain_dma_intr--;
-                                       /* ++roman: For Falcon SCSI, release the lock on the
-                                        * ST-DMA here if no other commands are waiting on the
-                                        * disconnected queue.
-                                        */
                                        maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
                                        return;
                                case MESSAGE_REJECT:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        switch (hostdata->last_message) {
                                        case HEAD_OF_QUEUE_TAG:
                                        case ORDERED_QUEUE_TAG:
@@ -2274,27 +2069,20 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                cmd->device->tagged_supported = 0;
                                                hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
                                                cmd->tag = TAG_NONE;
-                                               dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu rejected "
-                                                          "QUEUE_TAG message; tagged queuing "
-                                                          "disabled\n",
-                                                          HOSTNO, cmd->device->id, cmd->device->lun);
+                                               dsprintk(NDEBUG_TAGS, instance, "target %d lun %llu rejected QUEUE_TAG message; tagged queuing disabled\n",
+                                                        scmd_id(cmd), cmd->device->lun);
                                                break;
                                        }
                                        break;
                                case DISCONNECT:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       local_irq_save(flags);
-                                       cmd->device->disconnect = 1;
-                                       LIST(cmd,hostdata->disconnected_queue);
-                                       SET_NEXT(cmd, hostdata->disconnected_queue);
                                        hostdata->connected = NULL;
-                                       hostdata->disconnected_queue = cmd;
-                                       local_irq_restore(flags);
-                                       dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %llu was "
-                                                 "moved from connected to the "
-                                                 "disconnected_queue\n", HOSTNO,
-                                                 cmd->device->id, cmd->device->lun);
+                                       list_add(&ncmd->list, &hostdata->disconnected);
+                                       dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+                                                instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+                                                cmd, scmd_id(cmd), cmd->device->lun);
+
                                        /*
                                         * Restore phase bits to 0 so an interrupted selection,
                                         * arbitration can resume.
@@ -2303,9 +2091,6 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 
                                        /* Enable reselect interrupts */
                                        NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-                                       /* Wait for bus free to avoid nasty timeouts */
-                                       while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
-                                               barrier();
 #ifdef SUN3_SCSI_VME
                                        dregs->csr |= CSR_DMA_ENABLE;
 #endif
@@ -2324,37 +2109,30 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                case RESTORE_POINTERS:
                                        /* Accept message by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-                                       /* Enable reselect interrupts */
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        break;
                                case EXTENDED_MESSAGE:
                                        /*
-                                        * Extended messages are sent in the following format :
-                                        * Byte
-                                        * 0            EXTENDED_MESSAGE == 1
-                                        * 1            length (includes one byte for code, doesn't
-                                        *              include first two bytes)
-                                        * 2            code
-                                        * 3..length+1  arguments
-                                        *
-                                        * Start the extended message buffer with the EXTENDED_MESSAGE
+                                        * Start the message buffer with the EXTENDED_MESSAGE
                                         * byte, since spi_print_msg() wants the whole thing.
                                         */
                                        extended_msg[0] = EXTENDED_MESSAGE;
                                        /* Accept first byte by clearing ACK */
                                        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
+                                       spin_unlock_irq(&hostdata->lock);
+
+                                       dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
 
                                        len = 2;
                                        data = extended_msg + 1;
                                        phase = PHASE_MSGIN;
                                        NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                       dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
-                                                  (int)extended_msg[1], (int)extended_msg[2]);
+                                       dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+                                                (int)extended_msg[1],
+                                                (int)extended_msg[2]);
 
-                                       if (!len && extended_msg[1] <=
-                                           (sizeof(extended_msg) - 1)) {
+                                       if (!len && extended_msg[1] > 0 &&
+                                           extended_msg[1] <= sizeof(extended_msg) - 2) {
                                                /* Accept third byte by clearing ACK */
                                                NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
                                                len = extended_msg[1] - 1;
@@ -2362,8 +2140,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                phase = PHASE_MSGIN;
 
                                                NCR5380_transfer_pio(instance, &phase, &len, &data);
-                                               dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
-                                                          HOSTNO, len);
+                                               dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+                                                        len);
 
                                                switch (extended_msg[2]) {
                                                case EXTENDED_SDTR:
@@ -2373,15 +2151,18 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                                        tmp = 0;
                                                }
                                        } else if (len) {
-                                               printk(KERN_NOTICE "scsi%d: error receiving "
-                                                      "extended message\n", HOSTNO);
+                                               shost_printk(KERN_ERR, instance, "error receiving extended message\n");
                                                tmp = 0;
                                        } else {
-                                               printk(KERN_NOTICE "scsi%d: extended message "
-                                                          "code %02x length %d is too long\n",
-                                                          HOSTNO, extended_msg[2], extended_msg[1]);
+                                               shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+                                                            extended_msg[2], extended_msg[1]);
                                                tmp = 0;
                                        }
+
+                                       spin_lock_irq(&hostdata->lock);
+                                       if (!hostdata->connected)
+                                               return;
+
                                        /* Fall through to reject message */
 
                                        /*
@@ -2390,8 +2171,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                         */
                                default:
                                        if (!tmp) {
-                                               printk(KERN_INFO "scsi%d: rejecting message ",
-                                                      instance->host_no);
+                                               shost_printk(KERN_ERR, instance, "rejecting message ");
                                                spi_print_msg(extended_msg);
                                                printk("\n");
                                        } else if (tmp != EXTENDED_MESSAGE)
@@ -2414,18 +2194,11 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                hostdata->last_message = msgout;
                                NCR5380_transfer_pio(instance, &phase, &len, &data);
                                if (msgout == ABORT) {
-                                       local_irq_save(flags);
-#ifdef SUPPORT_TAGS
-                                       cmd_free_tag(cmd);
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
                                        hostdata->connected = NULL;
                                        cmd->result = DID_ERROR << 16;
-                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+                                       complete_cmd(instance, cmd);
                                        maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
-                                       cmd->scsi_done(cmd);
+                                       NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
                                        return;
                                }
                                msgout = NOP;
@@ -2447,22 +2220,25 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
                                cmd->SCp.Status = tmp;
                                break;
                        default:
-                               printk("scsi%d: unknown phase\n", HOSTNO);
+                               shost_printk(KERN_ERR, instance, "unknown phase\n");
                                NCR5380_dprint(NDEBUG_ANY, instance);
                        } /* switch(phase) */
-               } /* if (tmp * SR_REQ) */
-       } /* while (1) */
+               } else {
+                       spin_unlock_irq(&hostdata->lock);
+                       NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+                       spin_lock_irq(&hostdata->lock);
+               }
+       }
 }
 
 /*
  * Function : void NCR5380_reselect (struct Scsi_Host *instance)
  *
  * Purpose : does reselection, initializing the instance->connected
- *     field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- *     nexus has been reestablished,
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
  *
  * Inputs : instance - this instance of the NCR5380.
- *
  */
 
 
@@ -2471,7 +2247,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 
 static void NCR5380_reselect(struct Scsi_Host *instance)
 {
-       SETUP_HOSTDATA(instance);
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        unsigned char target_mask;
        unsigned char lun;
 #ifdef SUPPORT_TAGS
@@ -2480,7 +2256,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
        unsigned char msg[3];
        int __maybe_unused len;
        unsigned char __maybe_unused *data, __maybe_unused phase;
-       struct scsi_cmnd *tmp = NULL, *prev;
+       struct NCR5380_cmd *ncmd;
+       struct scsi_cmnd *tmp;
 
        /*
         * Disable arbitration, etc. since the host adapter obviously
@@ -2488,11 +2265,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         */
 
        NCR5380_write(MODE_REG, MR_BASE);
-       hostdata->restart_select = 1;
 
        target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
 
-       dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
+       dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
 
        /*
         * At this point, we have detected that our SCSI ID is on the bus,
@@ -2504,17 +2280,22 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         */
 
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
-       while (NCR5380_read(STATUS_REG) & SR_SEL)
-               ;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+               NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+               return;
+       }
        NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 
        /*
         * Wait for target to go into MSGIN.
         */
 
-       while (!(NCR5380_read(STATUS_REG) & SR_REQ))
-               ;
+       if (NCR5380_poll_politely(instance,
+                                 STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+               do_abort(instance);
+               return;
+       }
 
 #if defined(CONFIG_SUN3) && defined(REAL_DMA)
        /* acknowledge toggle to MSGIN */
@@ -2527,15 +2308,21 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
        data = msg;
        phase = PHASE_MSGIN;
        NCR5380_transfer_pio(instance, &phase, &len, &data);
+
+       if (len) {
+               do_abort(instance);
+               return;
+       }
 #endif
 
        if (!(msg[0] & 0x80)) {
-               printk(KERN_DEBUG "scsi%d: expecting IDENTIFY message, got ", HOSTNO);
+               shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
                spi_print_msg(msg);
+               printk("\n");
                do_abort(instance);
                return;
        }
-       lun = (msg[0] & 0x07);
+       lun = msg[0] & 0x07;
 
 #if defined(SUPPORT_TAGS) && !defined(CONFIG_SUN3)
        /* If the phase is still MSGIN, the target wants to send some more
@@ -2551,8 +2338,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
                    msg[1] == SIMPLE_QUEUE_TAG)
                        tag = msg[2];
-               dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
-                          "reselection\n", HOSTNO, target_mask, lun, tag);
+               dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n",
+                        target_mask, lun, tag);
        }
 #endif
 
@@ -2561,36 +2348,34 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
         * just reestablished, and remove it from the disconnected queue.
         */
 
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL;
-            tmp; prev = tmp, tmp = NEXT(tmp)) {
-               if ((target_mask == (1 << tmp->device->id)) && (lun == tmp->device->lun)
+       tmp = NULL;
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               if (target_mask == (1 << scmd_id(cmd)) &&
+                   lun == (u8)cmd->device->lun
 #ifdef SUPPORT_TAGS
-                   && (tag == tmp->tag)
+                   && (tag == cmd->tag)
 #endif
                    ) {
-                       if (prev) {
-                               REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
-                               SET_NEXT(prev, NEXT(tmp));
-                       } else {
-                               REMOVE(-1, hostdata->disconnected_queue, tmp, NEXT(tmp));
-                               hostdata->disconnected_queue = NEXT(tmp);
-                       }
-                       SET_NEXT(tmp, NULL);
+                       list_del(&ncmd->list);
+                       tmp = cmd;
                        break;
                }
        }
 
-       if (!tmp) {
-               printk(KERN_WARNING "scsi%d: warning: target bitmask %02x lun %d "
-#ifdef SUPPORT_TAGS
-                      "tag %d "
-#endif
-                      "not in disconnected_queue.\n",
-                      HOSTNO, target_mask, lun
+       if (tmp) {
+               dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+                        "reselect: removed %p from disconnected queue\n", tmp);
+       } else {
+
 #ifdef SUPPORT_TAGS
-                      , tag
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d tag %d not in disconnected queue.\n",
+                            target_mask, lun, tag);
+#else
+               shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+                            target_mask, lun);
 #endif
-                       );
                /*
                 * Since we have an established nexus that we can't do anything
                 * with, we must abort it.
@@ -2614,7 +2399,8 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                }
                /* setup this command for dma if not already */
                if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
-                       sun3scsi_dma_setup(d, count, rq_data_dir(tmp->request));
+                       sun3scsi_dma_setup(instance, d, count,
+                                          rq_data_dir(tmp->request));
                        sun3_dma_setup_done = tmp;
                }
        }
@@ -2639,235 +2425,196 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
                if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
                    msg[1] == SIMPLE_QUEUE_TAG)
                        tag = msg[2];
-               dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at reselection\n"
-                       HOSTNO, target_mask, lun, tag);
+               dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n"
+                        target_mask, lun, tag);
        }
 #endif
 
        hostdata->connected = tmp;
-       dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %llu, tag = %d\n",
-                  HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
+       dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+                scmd_id(tmp), tmp->device->lun, tmp->tag);
 }
 
 
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- *     host byte of the result field to, if zero DID_ABORTED is
- *     used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- *      connected, you have to wait for it to complete.  If this is
- *      a problem, we could implement longjmp() / setjmp(), setjmp()
- *      called where the loop started in NCR5380_main().
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
  */
 
-static
-int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+                          struct scsi_cmnd *needle)
 {
-       struct Scsi_Host *instance = cmd->device->host;
-       SETUP_HOSTDATA(instance);
-       struct scsi_cmnd *tmp, **prev;
-       unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       scmd_printk(KERN_NOTICE, cmd, "aborting command\n");
+       list_for_each_entry(ncmd, haystack, list)
+               if (NCR5380_to_scmd(ncmd) == needle)
+                       return true;
+       return false;
+}
 
-       NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
 
-       local_irq_save(flags);
+static bool list_del_cmd(struct list_head *haystack,
+                         struct scsi_cmnd *needle)
+{
+       if (list_find_cmd(haystack, needle)) {
+               struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
 
-       dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
-                   NCR5380_read(BUS_AND_STATUS_REG),
-                   NCR5380_read(STATUS_REG));
+               list_del(&ncmd->list);
+               return true;
+       }
+       return false;
+}
 
-#if 1
-       /*
-        * Case 1 : If the command is the currently executing command,
-        * we'll set the aborted flag and return control so that
-        * information transfer routine can exit cleanly.
-        */
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
+ */
 
-       if (hostdata->connected == cmd) {
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+       struct Scsi_Host *instance = cmd->device->host;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       unsigned long flags;
+       int result = SUCCESS;
 
-               dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
-               /*
-                * We should perform BSY checking, and make sure we haven't slipped
-                * into BUS FREE.
-                */
+       spin_lock_irqsave(&hostdata->lock, flags);
 
-               /*      NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN); */
-               /*
-                * Since we can't change phases until we've completed the current
-                * handshake, we have to source or sink a byte of data if the current
-                * phase is not MSGOUT.
-                */
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
 
-               /*
-                * Return control to the executing NCR drive so we can clear the
-                * aborted flag and get back into our main loop.
-                */
+       if (list_del_cmd(&hostdata->unissued, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from issue queue\n", cmd);
+               cmd->result = DID_ABORT << 16;
+               cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
+       }
 
-               if (do_abort(instance) == 0) {
-                       hostdata->aborted = 1;
-                       hostdata->connected = NULL;
-                       cmd->result = DID_ABORT << 16;
-#ifdef SUPPORT_TAGS
-                       cmd_free_tag(cmd);
-#else
-                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
-                       maybe_release_dma_irq(instance);
-                       local_irq_restore(flags);
-                       cmd->scsi_done(cmd);
-                       return SUCCESS;
-               } else {
-                       local_irq_restore(flags);
-                       printk("scsi%d: abort of connected command failed!\n", HOSTNO);
-                       return FAILED;
-               }
+       if (hostdata->selecting == cmd) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: cmd %p == selecting\n", cmd);
+               hostdata->selecting = NULL;
+               cmd->result = DID_ABORT << 16;
+               complete_cmd(instance, cmd);
+               goto out;
        }
-#endif
 
-       /*
-        * Case 2 : If the command hasn't been issued yet, we simply remove it
-        *          from the issue queue.
-        */
-       for (prev = (struct scsi_cmnd **)&(hostdata->issue_queue),
-            tmp = (struct scsi_cmnd *)hostdata->issue_queue;
-            tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
-               if (cmd == tmp) {
-                       REMOVE(5, *prev, tmp, NEXT(tmp));
-                       (*prev) = NEXT(tmp);
-                       SET_NEXT(tmp, NULL);
-                       tmp->result = DID_ABORT << 16;
-                       maybe_release_dma_irq(instance);
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
-                                   HOSTNO);
-                       /* Tagged queuing note: no tag to free here, hasn't been assigned
-                        * yet... */
-                       tmp->scsi_done(tmp);
-                       return SUCCESS;
+       if (list_del_cmd(&hostdata->disconnected, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: removed %p from disconnected list\n", cmd);
+               cmd->result = DID_ERROR << 16;
+               if (!hostdata->connected)
+                       NCR5380_select(instance, cmd);
+               if (hostdata->connected != cmd) {
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
                }
        }
 
-       /*
-        * Case 3 : If any commands are connected, we're going to fail the abort
-        *          and let the high level SCSI driver retry at a later time or
-        *          issue a reset.
-        *
-        *          Timeouts, and therefore aborted commands, will be highly unlikely
-        *          and handling them cleanly in this situation would make the common
-        *          case of noresets less efficient, and would pollute our code.  So,
-        *          we fail.
-        */
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               if (cmd->cmnd[0] == REQUEST_SENSE)
+                       complete_cmd(instance, cmd);
+               else {
+                       struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
 
-       if (hostdata->connected) {
-               local_irq_restore(flags);
-               dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
-               return FAILED;
+                       /* Perform autosense for this command */
+                       list_add(&ncmd->list, &hostdata->autosense);
+               }
        }
 
-       /*
-        * Case 4: If the command is currently disconnected from the bus, and
-        *      there are no connected commands, we reconnect the I_T_L or
-        *      I_T_L_Q nexus associated with it, go into message out, and send
-        *      an abort message.
-        *
-        * This case is especially ugly. In order to reestablish the nexus, we
-        * need to call NCR5380_select().  The easiest way to implement this
-        * function was to abort if the bus was busy, and let the interrupt
-        * handler triggered on the SEL for reselect take care of lost arbitrations
-        * where necessary, meaning interrupts need to be enabled.
-        *
-        * When interrupts are enabled, the queues may change - so we
-        * can't remove it from the disconnected queue before selecting it
-        * because that could cause a failure in hashing the nexus if that
-        * device reselected.
-        *
-        * Since the queues may change, we can't use the pointers from when we
-        * first locate it.
-        *
-        * So, we must first locate the command, and if NCR5380_select()
-        * succeeds, then issue the abort, relocate the command and remove
-        * it from the disconnected queue.
-        */
-
-       for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp;
-            tmp = NEXT(tmp)) {
-               if (cmd == tmp) {
-                       local_irq_restore(flags);
-                       dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
-
-                       if (NCR5380_select(instance, cmd))
-                               return FAILED;
-
-                       dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
-
-                       do_abort(instance);
-
-                       local_irq_save(flags);
-                       for (prev = (struct scsi_cmnd **)&(hostdata->disconnected_queue),
-                            tmp = (struct scsi_cmnd *)hostdata->disconnected_queue;
-                            tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
-                               if (cmd == tmp) {
-                                       REMOVE(5, *prev, tmp, NEXT(tmp));
-                                       *prev = NEXT(tmp);
-                                       SET_NEXT(tmp, NULL);
-                                       tmp->result = DID_ABORT << 16;
-                                       /* We must unlock the tag/LUN immediately here, since the
-                                        * target goes to BUS FREE and doesn't send us another
-                                        * message (COMMAND_COMPLETE or the like)
-                                        */
-#ifdef SUPPORT_TAGS
-                                       cmd_free_tag(tmp);
-#else
-                                       hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
-                                       maybe_release_dma_irq(instance);
-                                       local_irq_restore(flags);
-                                       tmp->scsi_done(tmp);
-                                       return SUCCESS;
-                               }
-                       }
+       if (list_find_cmd(&hostdata->autosense, cmd)) {
+               dsprintk(NDEBUG_ABORT, instance,
+                        "abort: found %p on sense queue\n", cmd);
+               spin_unlock_irqrestore(&hostdata->lock, flags);
+               queue_work(hostdata->work_q, &hostdata->main_task);
+               msleep(1000);
+               spin_lock_irqsave(&hostdata->lock, flags);
+               if (list_del_cmd(&hostdata->autosense, cmd)) {
+                       dsprintk(NDEBUG_ABORT, instance,
+                                "abort: removed %p from sense queue\n", cmd);
+                       set_host_byte(cmd, DID_ABORT);
+                       complete_cmd(instance, cmd);
+                       goto out;
                }
        }
 
-       /* Maybe it is sufficient just to release the ST-DMA lock... (if
-        * possible at all) At least, we should check if the lock could be
-        * released after the abort, in case it is kept due to some bug.
-        */
-       maybe_release_dma_irq(instance);
-       local_irq_restore(flags);
+       if (hostdata->connected == cmd) {
+               dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+               hostdata->connected = NULL;
+               if (do_abort(instance)) {
+                       set_host_byte(cmd, DID_ERROR);
+                       complete_cmd(instance, cmd);
+                       result = FAILED;
+                       goto out;
+               }
+               set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+               hostdata->dma_len = 0;
+#endif
+               complete_cmd(instance, cmd);
+       }
 
-       /*
-        * Case 5 : If we reached this point, the command was not found in any of
-        *          the queues.
-        *
-        * We probably reached this point because of an unlikely race condition
-        * between the command completing successfully and the abortion code,
-        * so we won't panic, but we will notify the user in case something really
-        * broke.
-        */
+out:
+       if (result == FAILED)
+               dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+       else
+               dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
 
-       printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO);
+       queue_work(hostdata->work_q, &hostdata->main_task);
+       maybe_release_dma_irq(instance);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
-       return FAILED;
+       return result;
 }
 
 
-/*
- * Function : int NCR5380_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS or FAILURE
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
  *
+ * Returns SUCCESS
  */
 
 static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
@@ -2876,23 +2623,22 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int i;
        unsigned long flags;
+       struct NCR5380_cmd *ncmd;
 
-       NCR5380_print_status(instance);
+       spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+       scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+       NCR5380_dprint(NDEBUG_ANY, instance);
+       NCR5380_dprint_phase(NDEBUG_ANY, instance);
+
+       do_reset(instance);
 
-       /* get in phase */
-       NCR5380_write(TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-       /* assert RST */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       udelay(40);
        /* reset NCR registers */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
        NCR5380_write(MODE_REG, MR_BASE);
        NCR5380_write(TARGET_COMMAND_REG, 0);
        NCR5380_write(SELECT_ENABLE_REG, 0);
-       /* ++roman: reset interrupt condition! otherwise no interrupts don't get
-        * through anymore ... */
-       (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
        /* After the reset, there are no more connected or disconnected commands
         * and no busy units; so clear the low-level status here to avoid
@@ -2900,17 +2646,34 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
         * commands!
         */
 
-       if (hostdata->issue_queue)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
-       if (hostdata->connected)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
-       if (hostdata->disconnected_queue)
-               dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+       hostdata->selecting = NULL;
+
+       list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       list_for_each_entry(ncmd, &hostdata->autosense, list) {
+               struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+               set_host_byte(cmd, DID_RESET);
+               cmd->scsi_done(cmd);
+       }
+
+       if (hostdata->connected) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->connected);
+               hostdata->connected = NULL;
+       }
+
+       if (hostdata->sensing) {
+               set_host_byte(hostdata->connected, DID_RESET);
+               complete_cmd(instance, hostdata->sensing);
+               hostdata->sensing = NULL;
+       }
 
-       local_irq_save(flags);
-       hostdata->issue_queue = NULL;
-       hostdata->connected = NULL;
-       hostdata->disconnected_queue = NULL;
 #ifdef SUPPORT_TAGS
        free_all_tags(hostdata);
 #endif
@@ -2920,8 +2683,9 @@ static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
        hostdata->dma_len = 0;
 #endif
 
+       queue_work(hostdata->work_q, &hostdata->main_task);
        maybe_release_dma_irq(instance);
-       local_irq_restore(flags);
+       spin_unlock_irqrestore(&hostdata->lock, flags);
 
        return SUCCESS;
 }
index 5ede3daa93dc546c63a67809882f61009fb29582..78d1b2963f2c04b00ef0afbfb33be0560eef0361 100644 (file)
@@ -66,7 +66,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
@@ -98,7 +97,6 @@
 
 #define NCR5380_queue_command           atari_scsi_queue_command
 #define NCR5380_abort                   atari_scsi_abort
-#define NCR5380_show_info               atari_scsi_show_info
 #define NCR5380_info                    atari_scsi_info
 
 #define NCR5380_dma_read_setup(instance, data, count) \
@@ -161,23 +159,10 @@ static inline unsigned long SCSI_DMA_GETADR(void)
        return adr;
 }
 
-#define HOSTDATA_DMALEN                (((struct NCR5380_hostdata *) \
-                               (atari_scsi_host->hostdata))->dma_len)
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#ifndef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-#define        AFTER_RESET_DELAY       (HZ/2)
-#else
-#define        AFTER_RESET_DELAY       (5*HZ/2)
-#endif
-
 #ifdef REAL_DMA
 static void atari_scsi_fetch_restbytes(void);
 #endif
 
-static struct Scsi_Host *atari_scsi_host;
 static unsigned char (*atari_scsi_reg_read)(unsigned char reg);
 static void (*atari_scsi_reg_write)(unsigned char reg, unsigned char value);
 
@@ -208,12 +193,12 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
 static int setup_use_tagged_queuing = -1;
 module_param(setup_use_tagged_queuing, int, 0);
-#endif
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
 
 
 #if defined(REAL_DMA)
@@ -273,15 +258,17 @@ static void scsi_dma_buserr(int irq, void *dummy)
 #endif
 
 
-static irqreturn_t scsi_tt_intr(int irq, void *dummy)
+static irqreturn_t scsi_tt_intr(int irq, void *dev)
 {
 #ifdef REAL_DMA
+       struct Scsi_Host *instance = dev;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int dma_stat;
 
        dma_stat = tt_scsi_dma.dma_ctrl;
 
-       dprintk(NDEBUG_INTR, "scsi%d: NCR5380 interrupt, DMA status = %02x\n",
-                  atari_scsi_host->host_no, dma_stat & 0xff);
+       dsprintk(NDEBUG_INTR, instance, "NCR5380 interrupt, DMA status = %02x\n",
+                dma_stat & 0xff);
 
        /* Look if it was the DMA that has interrupted: First possibility
         * is that a bus error occurred...
@@ -304,7 +291,8 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
         * data reg!
         */
        if ((dma_stat & 0x02) && !(dma_stat & 0x40)) {
-               atari_dma_residual = HOSTDATA_DMALEN - (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
+               atari_dma_residual = hostdata->dma_len -
+                       (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
 
                dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
                           atari_dma_residual);
@@ -356,15 +344,17 @@ static irqreturn_t scsi_tt_intr(int irq, void *dummy)
 
 #endif /* REAL_DMA */
 
-       NCR5380_intr(irq, dummy);
+       NCR5380_intr(irq, dev);
 
        return IRQ_HANDLED;
 }
 
 
-static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
+static irqreturn_t scsi_falcon_intr(int irq, void *dev)
 {
 #ifdef REAL_DMA
+       struct Scsi_Host *instance = dev;
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int dma_stat;
 
        /* Turn off DMA and select sector counter register before
@@ -399,7 +389,7 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
                        printk(KERN_ERR "SCSI DMA error: %ld bytes lost in "
                               "ST-DMA fifo\n", transferred & 15);
 
-               atari_dma_residual = HOSTDATA_DMALEN - transferred;
+               atari_dma_residual = hostdata->dma_len - transferred;
                dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
                           atari_dma_residual);
        } else
@@ -411,13 +401,14 @@ static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
                 * data to the original destination address.
                 */
                memcpy(atari_dma_orig_addr, phys_to_virt(atari_dma_startaddr),
-                      HOSTDATA_DMALEN - atari_dma_residual);
+                      hostdata->dma_len - atari_dma_residual);
                atari_dma_orig_addr = NULL;
        }
 
 #endif /* REAL_DMA */
 
-       NCR5380_intr(irq, dummy);
+       NCR5380_intr(irq, dev);
+
        return IRQ_HANDLED;
 }
 
@@ -488,7 +479,7 @@ static int __init atari_scsi_setup(char *str)
         * Defaults depend on TT or Falcon, determined at run time.
         * Negative values mean don't change.
         */
-       int ints[6];
+       int ints[8];
 
        get_options(str, ARRAY_SIZE(ints), ints);
 
@@ -504,10 +495,11 @@ static int __init atari_scsi_setup(char *str)
                setup_sg_tablesize = ints[3];
        if (ints[0] >= 4)
                setup_hostid = ints[4];
-#ifdef SUPPORT_TAGS
        if (ints[0] >= 5)
                setup_use_tagged_queuing = ints[5];
-#endif
+       /* ints[6] (use_pdma) is ignored */
+       if (ints[0] >= 7)
+               setup_toshiba_delay = ints[7];
 
        return 1;
 }
@@ -516,38 +508,6 @@ __setup("atascsi=", atari_scsi_setup);
 #endif /* !MODULE */
 
 
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-static void __init atari_scsi_reset_boot(void)
-{
-       unsigned long end;
-
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No messing
-        * with the queues, interrupts, or locks necessary here.
-        */
-
-       printk("Atari SCSI: resetting the SCSI bus...");
-
-       /* get in phase */
-       NCR5380_write(TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-
-       /* assert RST */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay(50);
-       /* reset RST and interrupt */
-       NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
-       end = jiffies + AFTER_RESET_DELAY;
-       while (time_before(jiffies, end))
-               barrier();
-
-       printk(" done\n");
-}
-#endif
-
 #if defined(REAL_DMA)
 
 static unsigned long atari_scsi_dma_setup(struct Scsi_Host *instance,
@@ -815,14 +775,14 @@ static int atari_scsi_bus_reset(struct scsi_cmnd *cmd)
 static struct scsi_host_template atari_scsi_template = {
        .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
-       .show_info              = atari_scsi_show_info,
        .name                   = "Atari native SCSI",
        .info                   = atari_scsi_info,
        .queuecommand           = atari_scsi_queue_command,
        .eh_abort_handler       = atari_scsi_abort,
        .eh_bus_reset_handler   = atari_scsi_bus_reset,
        .this_id                = 7,
-       .use_clustering         = DISABLE_CLUSTERING
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
 };
 
 static int __init atari_scsi_probe(struct platform_device *pdev)
@@ -880,7 +840,7 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
        } else {
                /* Test if a host id is set in the NVRam */
                if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) {
-                       unsigned char b = nvram_read_byte(14);
+                       unsigned char b = nvram_read_byte(16);
 
                        /* Arbitration enabled? (for TOS)
                         * If yes, use configured host ID
@@ -915,21 +875,18 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
                error = -ENOMEM;
                goto fail_alloc;
        }
-       atari_scsi_host = instance;
-
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-       atari_scsi_reset_boot();
-#endif
 
        instance->irq = irq->start;
 
        host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-
 #ifdef SUPPORT_TAGS
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
+       host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        if (IS_A_TT()) {
                error = request_irq(instance->irq, scsi_tt_intr, 0,
@@ -975,6 +932,8 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
 #endif
        }
 
+       NCR5380_maybe_reset_bus(instance);
+
        error = scsi_add_host(instance, NULL);
        if (error)
                goto fail_host;
@@ -989,6 +948,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
 fail_alloc:
        if (atari_dma_buffer)
index 4e7cad27246944f6e79cf21a56fb3f25f96e1234..bad5f32e1f670587ddedda3ec97104e1c2d4555a 100644 (file)
@@ -3,6 +3,7 @@ config BE2ISCSI
        depends on PCI && SCSI && NET
        select SCSI_ISCSI_ATTRS
        select ISCSI_BOOT_SYSFS
+       select IRQ_POLL
 
        help
        This driver implements the iSCSI functionality for Emulex
index 77f992e7472629eeeaedf3470ced43d02077d9ca..a41c6432f4446cf9082916a0859e08be3c0a70c7 100644 (file)
@@ -20,7 +20,7 @@
 
 #include <linux/pci.h>
 #include <linux/if_vlan.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
 #define FW_VER_LEN     32
 #define MCC_Q_LEN      128
 #define MCC_CQ_LEN     256
@@ -101,7 +101,7 @@ struct be_eq_obj {
        struct beiscsi_hba *phba;
        struct be_queue_info *cq;
        struct work_struct work_cqs; /* Work Item */
-       struct blk_iopoll       iopoll;
+       struct irq_poll iopoll;
 };
 
 struct be_mcc_obj {
index b7087ba69d8df1c2daa9ac7b1b8a9ab314257a8b..022e87b62e401a37e1d6578e065f389ce560a8b4 100644 (file)
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
                beiscsi_process_cq(pbe_eq);
-               blk_iopoll_enable(&pbe_eq->iopoll);
+               irq_poll_enable(&pbe_eq->iopoll);
        }
 }
 
index fe0c5143f8e68a6cb3e9618c98258c9e4118e1ae..cb9072a841be19cbfde9ff655fec50c523edf8b3 100644 (file)
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
        num_eq_processed = 0;
        while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
                                & EQE_VALID_MASK) {
-               if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-                       blk_iopoll_sched(&pbe_eq->iopoll);
+               irq_poll_sched(&pbe_eq->iopoll);
 
                AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
                queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
                        spin_unlock_irqrestore(&phba->isr_lock, flags);
                        num_mcceq_processed++;
                } else {
-                       if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-                               blk_iopoll_sched(&pbe_eq->iopoll);
+                       irq_poll_sched(&pbe_eq->iopoll);
                        num_ioeq_processed++;
                }
                AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
        hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
 }
 
-static int be_iopoll(struct blk_iopoll *iop, int budget)
+static int be_iopoll(struct irq_poll *iop, int budget)
 {
        unsigned int ret;
        struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
        pbe_eq->cq_count += ret;
        if (ret < budget) {
                phba = pbe_eq->phba;
-               blk_iopoll_complete(iop);
+               irq_poll_complete(iop);
                beiscsi_log(phba, KERN_INFO,
                            BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
                            "BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
        }
 
        if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+               irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
                                be_iopoll);
-               blk_iopoll_enable(&pbe_eq->iopoll);
        }
 
        i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
 
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+               irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
                                be_iopoll);
-               blk_iopoll_enable(&pbe_eq->iopoll);
        }
 
        i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
        destroy_workqueue(phba->wq);
        for (i = 0; i < phba->num_cpus; i++) {
                pbe_eq = &phwi_context->be_eq[i];
-               blk_iopoll_disable(&pbe_eq->iopoll);
+               irq_poll_disable(&pbe_eq->iopoll);
        }
 free_twq:
        beiscsi_clean_port(phba);
index 0e2bee937fe81b345abee5ca20beec65f098f175..e22a268fd311b9a859166c57a32b3cf499fd6c26 100644 (file)
@@ -57,7 +57,7 @@ MODULE_PARM_DESC(cxgb3i_snd_win, "TCP send window in bytes (default=128KB)");
 
 static int cxgb3i_rx_credit_thres = 10 * 1024;
 module_param(cxgb3i_rx_credit_thres, int, 0644);
-MODULE_PARM_DESC(rx_credit_thres,
+MODULE_PARM_DESC(cxgb3i_rx_credit_thres,
                 "RX credits return threshold in bytes (default=10KB)");
 
 static unsigned int cxgb3i_max_connect = 8 * 1024;
index 3e088125a8be6a339757dc1380f1c73418952bfc..6c14e68b9e1a80aab149ed3f7556ce3e22de7280 100644 (file)
 
 #define DONT_USE_INTR
 
-#define NCR5380_read(reg)              inb(port + reg)
-#define NCR5380_write(reg, value)      outb(value, port + reg)
+#define NCR5380_read(reg)              inb(instance->io_port + reg)
+#define NCR5380_write(reg, value)      outb(value, instance->io_port + reg)
 
 #define NCR5380_implementation_fields  /* none */
-#define NCR5380_local_declare()                unsigned int port
-#define NCR5380_setup(instance)                port = instance->io_port
-
-/*
- * Includes needed for NCR5380.[ch] (XXX: Move them to NCR5380.h)
- */
-#include <linux/delay.h>
 
 #include "NCR5380.h"
 #include "NCR5380.c"
@@ -56,6 +49,7 @@
 
 
 static struct scsi_host_template dmx3191d_driver_template = {
+       .module                 = THIS_MODULE,
        .proc_name              = DMX3191D_DRIVER_NAME,
        .name                   = "Domex DMX3191D",
        .info                   = NCR5380_info,
@@ -67,6 +61,8 @@ static struct scsi_host_template dmx3191d_driver_template = {
        .sg_tablesize           = SG_ALL,
        .cmd_per_lun            = 2,
        .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int dmx3191d_probe_one(struct pci_dev *pdev,
@@ -97,17 +93,25 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
         */
        shost->irq = NO_IRQ;
 
-       NCR5380_init(shost, FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E);
+       error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+       if (error)
+               goto out_host_put;
+
+       NCR5380_maybe_reset_bus(shost);
 
        pci_set_drvdata(pdev, shost);
 
        error = scsi_add_host(shost, &pdev->dev);
        if (error)
-               goto out_release_region;
+               goto out_exit;
 
        scsi_scan_host(shost);
        return 0;
 
+out_exit:
+       NCR5380_exit(shost);
+out_host_put:
+       scsi_host_put(shost);
  out_release_region:
        release_region(io, DMX3191D_REGION_LEN);
  out_disable_device:
@@ -119,15 +123,14 @@ static int dmx3191d_probe_one(struct pci_dev *pdev,
 static void dmx3191d_remove_one(struct pci_dev *pdev)
 {
        struct Scsi_Host *shost = pci_get_drvdata(pdev);
+       unsigned long io = shost->io_port;
 
        scsi_remove_host(shost);
 
        NCR5380_exit(shost);
-
-       release_region(shost->io_port, DMX3191D_REGION_LEN);
-       pci_disable_device(pdev);
-
        scsi_host_put(shost);
+       release_region(io, DMX3191D_REGION_LEN);
+       pci_disable_device(pdev);
 }
 
 static struct pci_device_id dmx3191d_pci_tbl[] = {
index 4c74c7ba2dff7cf6829684176c484ecbf356cea1..6c736b071cf4bcbaf8f5fb68b19192dc039dd293 100644 (file)
@@ -1,9 +1,5 @@
-
 #define PSEUDO_DMA
 #define DONT_USE_INTR
-#define UNSAFE                 /* Leave interrupts enabled during pseudo-dma I/O */
-#define DMA_WORKS_RIGHT
-
 
 /*
  * DTC 3180/3280 driver, by
 
 
 #include <linux/module.h>
-#include <linux/signal.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <scsi/scsi_host.h>
+
 #include "dtc.h"
 #define AUTOPROBE_IRQ
 #include "NCR5380.h"
@@ -150,7 +144,7 @@ static const struct signature {
 
 static int __init dtc_setup(char *str)
 {
-       static int commandline_current = 0;
+       static int commandline_current;
        int i;
        int ints[10];
 
@@ -188,7 +182,7 @@ __setup("dtc=", dtc_setup);
 
 static int __init dtc_detect(struct scsi_host_template * tpnt)
 {
-       static int current_override = 0, current_base = 0;
+       static int current_override, current_base;
        struct Scsi_Host *instance;
        unsigned int addr;
        void __iomem *base;
@@ -205,9 +199,8 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
                                addr = 0;
                } else
                        for (; !addr && (current_base < NO_BASES); ++current_base) {
-#if (DTCDEBUG & DTCDEBUG_INIT)
-                               printk(KERN_DEBUG "scsi-dtc : probing address %08x\n", bases[current_base].address);
-#endif
+                               dprintk(NDEBUG_INIT, "dtc: probing address 0x%08x\n",
+                                       (unsigned int)bases[current_base].address);
                                if (bases[current_base].noauto)
                                        continue;
                                base = ioremap(bases[current_base].address, 0x2000);
@@ -216,18 +209,14 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
                                for (sig = 0; sig < NO_SIGNATURES; ++sig) {
                                        if (check_signature(base + signatures[sig].offset, signatures[sig].string, strlen(signatures[sig].string))) {
                                                addr = bases[current_base].address;
-#if (DTCDEBUG & DTCDEBUG_INIT)
-                                               printk(KERN_DEBUG "scsi-dtc : detected board.\n");
-#endif
+                                               dprintk(NDEBUG_INIT, "dtc: detected board\n");
                                                goto found;
                                        }
                                }
                                iounmap(base);
                        }
 
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-               printk(KERN_DEBUG "scsi-dtc : base = %08x\n", addr);
-#endif
+               dprintk(NDEBUG_INIT, "dtc: addr = 0x%08x\n", addr);
 
                if (!addr)
                        break;
@@ -235,12 +224,15 @@ static int __init dtc_detect(struct scsi_host_template * tpnt)
 found:
                instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
                if (instance == NULL)
-                       break;
+                       goto out_unmap;
 
                instance->base = addr;
                ((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
 
-               NCR5380_init(instance, 0);
+               if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+                       goto out_unregister;
+
+               NCR5380_maybe_reset_bus(instance);
 
                NCR5380_write(DTC_CONTROL_REG, CSR_5380_INTR);  /* Enable int's */
                if (overrides[current_override].irq != IRQ_AUTO)
@@ -271,14 +263,19 @@ found:
                        printk(KERN_WARNING "scsi%d : interrupts not used. Might as well not jumper it.\n", instance->host_no);
                instance->irq = NO_IRQ;
 #endif
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-               printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+               dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+                       instance->host_no, instance->irq);
 
                ++current_override;
                ++count;
        }
        return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_unmap:
+       iounmap(base);
+       return count;
 }
 
 /*
@@ -331,12 +328,8 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
        unsigned char *d = dst;
        int i;                  /* For counting time spent in the poll-loop */
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
        i = 0;
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-       NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
        if (instance->irq == NO_IRQ)
                NCR5380_write(DTC_CONTROL_REG, CSR_DIR_READ);
        else
@@ -348,7 +341,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
                while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
                        ++i;
                rtrc(3);
-               memcpy_fromio(d, base + DTC_DATA_BUF, 128);
+               memcpy_fromio(d, hostdata->base + DTC_DATA_BUF, 128);
                d += 128;
                len -= 128;
                rtrc(7);
@@ -358,9 +351,7 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
        rtrc(4);
        while (!(NCR5380_read(DTC_CONTROL_REG) & D_CR_ACCESS))
                ++i;
-       NCR5380_write(MODE_REG, 0);     /* Clear the operating mode */
        rtrc(0);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        if (i > hostdata->spin_max_r)
                hostdata->spin_max_r = i;
        return (0);
@@ -383,12 +374,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
 {
        int i;
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-       NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
-       /* set direction (write) */
        if (instance->irq == NO_IRQ)
                NCR5380_write(DTC_CONTROL_REG, 0);
        else
@@ -400,7 +386,7 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
                while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
                        ++i;
                rtrc(3);
-               memcpy_toio(base + DTC_DATA_BUF, src, 128);
+               memcpy_toio(hostdata->base + DTC_DATA_BUF, src, 128);
                src += 128;
                len -= 128;
        }
@@ -413,47 +399,60 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
                ++i;
        rtrc(7);
        /* Check for parity error here. fixme. */
-       NCR5380_write(MODE_REG, 0);     /* Clear the operating mode */
        rtrc(0);
        if (i > hostdata->spin_max_w)
                hostdata->spin_max_w = i;
        return (0);
 }
 
+static int dtc_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+       int transfersize = cmd->transfersize;
+
+       /* Limit transfers to 32K, for xx400 & xx406
+        * pseudoDMA that transfers in 128 bytes blocks.
+        */
+       if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+           !(cmd->SCp.this_residual % transfersize))
+               transfersize = 32 * 1024;
+
+       return transfersize;
+}
+
 MODULE_LICENSE("GPL");
 
 #include "NCR5380.c"
 
 static int dtc_release(struct Scsi_Host *shost)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(shost);
+       struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
-       iounmap(base);
+       iounmap(hostdata->base);
        return 0;
 }
 
 static struct scsi_host_template driver_template = {
-       .name                           = "DTC 3180/3280 ",
-       .detect                         = dtc_detect,
-       .release                        = dtc_release,
-       .proc_name                      = "dtc3x80",
-       .show_info                      = dtc_show_info,
-       .write_info                     = dtc_write_info,
-       .info                           = dtc_info,
-       .queuecommand                   = dtc_queue_command,
-       .eh_abort_handler               = dtc_abort,
-       .eh_bus_reset_handler           = dtc_bus_reset,
-       .bios_param                     = dtc_biosparam,
-       .can_queue                      = CAN_QUEUE,
-       .this_id                        = 7,
-       .sg_tablesize                   = SG_ALL,
-       .cmd_per_lun                    = CMD_PER_LUN,
-       .use_clustering                 = DISABLE_CLUSTERING,
+       .name                   = "DTC 3180/3280",
+       .detect                 = dtc_detect,
+       .release                = dtc_release,
+       .proc_name              = "dtc3x80",
+       .show_info              = dtc_show_info,
+       .write_info             = dtc_write_info,
+       .info                   = dtc_info,
+       .queuecommand           = dtc_queue_command,
+       .eh_abort_handler       = dtc_abort,
+       .eh_bus_reset_handler   = dtc_bus_reset,
+       .bios_param             = dtc_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
index 78a2332e9064ee6b58372943ec0c327d2a74b26f..56732cba8abad0110f04532239d62fa1b8163c91 100644 (file)
 #ifndef DTC3280_H
 #define DTC3280_H
 
-#define DTCDEBUG 0
-#define DTCDEBUG_INIT  0x1
-#define DTCDEBUG_TRANSFER 0x2
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32 
-#endif
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
-#define NCR5380_local_declare() \
-    void __iomem *base
-
-#define NCR5380_setup(instance) \
-    base = ((struct NCR5380_hostdata *)(instance)->hostdata)->base
+#define DTC_address(reg) \
+       (((struct NCR5380_hostdata *)shost_priv(instance))->base + DTC_5380_OFFSET + reg)
 
-#define DTC_address(reg) (base + DTC_5380_OFFSET + reg)
-
-#define dbNCR5380_read(reg)                                              \
-    (rval=readb(DTC_address(reg)), \
-     (((unsigned char) printk("DTC : read register %d at addr %p is: %02x\n"\
-    , (reg), DTC_address(reg), rval)), rval ) )
-
-#define dbNCR5380_write(reg, value) do {                                  \
-    printk("DTC : write %02x to register %d at address %p\n",         \
-            (value), (reg), DTC_address(reg));     \
-    writeb(value, DTC_address(reg));} while(0)
-
-
-#if !(DTCDEBUG & DTCDEBUG_TRANSFER) 
 #define NCR5380_read(reg) (readb(DTC_address(reg)))
 #define NCR5380_write(reg, value) (writeb(value, DTC_address(reg)))
-#else
-#define NCR5380_read(reg) (readb(DTC_address(reg)))
-#define xNCR5380_read(reg)                                             \
-    (((unsigned char) printk("DTC : read register %d at address %p\n"\
-    , (reg), DTC_address(reg))), readb(DTC_address(reg)))
 
-#define NCR5380_write(reg, value) do {                                 \
-    printk("DTC : write %02x to register %d at address %p\n",  \
-           (value), (reg), DTC_address(reg));  \
-    writeb(value, DTC_address(reg));} while(0)
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+        dtc_dma_xfer_len(cmd)
 
 #define NCR5380_intr                   dtc_intr
 #define NCR5380_queue_command          dtc_queue_command
index f8d2478b11cc7a53453899db7b256543c7f21a33..90091e69302025cbbaaf1d5836ebfe1952b74e6e 100644 (file)
  *     
  */
 
-/* settings for DTC3181E card with only Mustek scanner attached */
-#define USLEEP_POLL    msecs_to_jiffies(10)
-#define USLEEP_SLEEP   msecs_to_jiffies(200)
-#define USLEEP_WAITLONG        msecs_to_jiffies(5000)
-
 #define AUTOPROBE_IRQ
 
 #ifdef CONFIG_SCSI_GENERIC_NCR53C400
-#define NCR53C400_PSEUDO_DMA 1
 #define PSEUDO_DMA
-#define NCR53C400
 #endif
 
 #include <asm/io.h>
-#include <linux/signal.h>
 #include <linux/blkdev.h>
+#include <linux/module.h>
 #include <scsi/scsi_host.h>
 #include "g_NCR5380.h"
 #include "NCR5380.h"
-#include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/isapnp.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
 
-#define NCR_NOT_SET 0
-static int ncr_irq = NCR_NOT_SET;
-static int ncr_dma = NCR_NOT_SET;
-static int ncr_addr = NCR_NOT_SET;
-static int ncr_5380 = NCR_NOT_SET;
-static int ncr_53c400 = NCR_NOT_SET;
-static int ncr_53c400a = NCR_NOT_SET;
-static int dtc_3181e = NCR_NOT_SET;
+static int ncr_irq;
+static int ncr_dma;
+static int ncr_addr;
+static int ncr_5380;
+static int ncr_53c400;
+static int ncr_53c400a;
+static int dtc_3181e;
+static int hp_c2502;
 
 static struct override {
        NCR5380_map_type NCR5380_map_name;
@@ -121,7 +112,7 @@ static struct override {
 
 static void __init internal_setup(int board, char *str, int *ints)
 {
-       static int commandline_current = 0;
+       static int commandline_current;
        switch (board) {
        case BOARD_NCR5380:
                if (ints[0] != 2 && ints[0] != 3) {
@@ -235,6 +226,30 @@ static int __init do_DTC3181E_setup(char *str)
 
 #endif
 
+#ifndef SCSI_G_NCR5380_MEM
+/*
+ * Configure I/O address of 53C400A or DTC436 by writing magic numbers
+ * to ports 0x779 and 0x379.
+ */
+static void magic_configure(int idx, u8 irq, u8 magic[])
+{
+       u8 cfg = 0;
+
+       outb(magic[0], 0x779);
+       outb(magic[1], 0x379);
+       outb(magic[2], 0x379);
+       outb(magic[3], 0x379);
+       outb(magic[4], 0x379);
+
+       /* allowed IRQs for HP C2502 */
+       if (irq != 2 && irq != 3 && irq != 4 && irq != 5 && irq != 7)
+               irq = 0;
+       if (idx >= 0 && idx <= 7)
+               cfg = 0x80 | idx | (irq << 4);
+       outb(cfg, 0x379);
+}
+#endif
+
 /**
  *     generic_NCR5380_detect  -       look for NCR5380 controllers
  *     @tpnt: the scsi template
@@ -243,19 +258,18 @@ static int __init do_DTC3181E_setup(char *str)
  *     and DTC436(ISAPnP) controllers. If overrides have been set we use
  *     them.
  *
- *     The caller supplied NCR5380_init function is invoked from here, before
- *     the interrupt line is taken.
- *
  *     Locks: none
  */
 
 static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
 {
-       static int current_override = 0;
+       static int current_override;
        int count;
        unsigned int *ports;
+       u8 *magic = NULL;
 #ifndef SCSI_G_NCR5380_MEM
        int i;
+       int port_idx = -1;
        unsigned long region_size = 16;
 #endif
        static unsigned int __initdata ncr_53c400a_ports[] = {
@@ -264,27 +278,36 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
        static unsigned int __initdata dtc_3181e_ports[] = {
                0x220, 0x240, 0x280, 0x2a0, 0x2c0, 0x300, 0x320, 0x340, 0
        };
-       int flags = 0;
+       static u8 ncr_53c400a_magic[] __initdata = {    /* 53C400A & DTC436 */
+               0x59, 0xb9, 0xc5, 0xae, 0xa6
+       };
+       static u8 hp_c2502_magic[] __initdata = {       /* HP C2502 */
+               0x0f, 0x22, 0xf0, 0x20, 0x80
+       };
+       int flags;
        struct Scsi_Host *instance;
+       struct NCR5380_hostdata *hostdata;
 #ifdef SCSI_G_NCR5380_MEM
        unsigned long base;
        void __iomem *iomem;
 #endif
 
-       if (ncr_irq != NCR_NOT_SET)
+       if (ncr_irq)
                overrides[0].irq = ncr_irq;
-       if (ncr_dma != NCR_NOT_SET)
+       if (ncr_dma)
                overrides[0].dma = ncr_dma;
-       if (ncr_addr != NCR_NOT_SET)
+       if (ncr_addr)
                overrides[0].NCR5380_map_name = (NCR5380_map_type) ncr_addr;
-       if (ncr_5380 != NCR_NOT_SET)
+       if (ncr_5380)
                overrides[0].board = BOARD_NCR5380;
-       else if (ncr_53c400 != NCR_NOT_SET)
+       else if (ncr_53c400)
                overrides[0].board = BOARD_NCR53C400;
-       else if (ncr_53c400a != NCR_NOT_SET)
+       else if (ncr_53c400a)
                overrides[0].board = BOARD_NCR53C400A;
-       else if (dtc_3181e != NCR_NOT_SET)
+       else if (dtc_3181e)
                overrides[0].board = BOARD_DTC3181E;
+       else if (hp_c2502)
+               overrides[0].board = BOARD_HP_C2502;
 #ifndef SCSI_G_NCR5380_MEM
        if (!current_override && isapnp_present()) {
                struct pnp_dev *dev = NULL;
@@ -318,41 +341,45 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                }
        }
 #endif
-       tpnt->proc_name = "g_NCR5380";
 
        for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
                if (!(overrides[current_override].NCR5380_map_name))
                        continue;
 
                ports = NULL;
+               flags = 0;
                switch (overrides[current_override].board) {
                case BOARD_NCR5380:
                        flags = FLAG_NO_PSEUDO_DMA;
                        break;
                case BOARD_NCR53C400:
-                       flags = FLAG_NCR53C400;
+#ifdef PSEUDO_DMA
+                       flags = FLAG_NO_DMA_FIXUP;
+#endif
                        break;
                case BOARD_NCR53C400A:
-                       flags = FLAG_NO_PSEUDO_DMA;
+                       flags = FLAG_NO_DMA_FIXUP;
+                       ports = ncr_53c400a_ports;
+                       magic = ncr_53c400a_magic;
+                       break;
+               case BOARD_HP_C2502:
+                       flags = FLAG_NO_DMA_FIXUP;
                        ports = ncr_53c400a_ports;
+                       magic = hp_c2502_magic;
                        break;
                case BOARD_DTC3181E:
-                       flags = FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E;
+                       flags = FLAG_NO_DMA_FIXUP;
                        ports = dtc_3181e_ports;
+                       magic = ncr_53c400a_magic;
                        break;
                }
 
 #ifndef SCSI_G_NCR5380_MEM
-               if (ports) {
+               if (ports && magic) {
                        /* wakeup sequence for the NCR53C400A and DTC3181E */
 
                        /* Disable the adapter and look for a free io port */
-                       outb(0x59, 0x779);
-                       outb(0xb9, 0x379);
-                       outb(0xc5, 0x379);
-                       outb(0xae, 0x379);
-                       outb(0xa6, 0x379);
-                       outb(0x00, 0x379);
+                       magic_configure(-1, 0, magic);
 
                        if (overrides[current_override].NCR5380_map_name != PORT_AUTO)
                                for (i = 0; ports[i]; i++) {
@@ -371,17 +398,12 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                                }
                        if (ports[i]) {
                                /* At this point we have our region reserved */
-                               outb(0x59, 0x779);
-                               outb(0xb9, 0x379);
-                               outb(0xc5, 0x379);
-                               outb(0xae, 0x379);
-                               outb(0xa6, 0x379);
-                               outb(0x80 | i, 0x379);  /* set io port to be used */
+                               magic_configure(i, 0, magic); /* no IRQ yet */
                                outb(0xc0, ports[i] + 9);
                                if (inb(ports[i] + 9) != 0x80)
                                        continue;
-                               else
-                                       overrides[current_override].NCR5380_map_name = ports[i];
+                               overrides[current_override].NCR5380_map_name = ports[i];
+                               port_idx = i;
                        } else
                                continue;
                }
@@ -403,24 +425,65 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                }
 #endif
                instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
-               if (instance == NULL) {
-#ifndef SCSI_G_NCR5380_MEM
-                       release_region(overrides[current_override].NCR5380_map_name, region_size);
-#else
-                       iounmap(iomem);
-                       release_mem_region(base, NCR5380_region_size);
-#endif
-                       continue;
-               }
+               if (instance == NULL)
+                       goto out_release;
+               hostdata = shost_priv(instance);
 
-               instance->NCR5380_instance_name = overrides[current_override].NCR5380_map_name;
 #ifndef SCSI_G_NCR5380_MEM
+               instance->io_port = overrides[current_override].NCR5380_map_name;
                instance->n_io_port = region_size;
+               hostdata->io_width = 1; /* 8-bit PDMA by default */
+
+               /*
+                * On NCR53C400 boards, NCR5380 registers are mapped 8 past
+                * the base address.
+                */
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+                       instance->io_port += 8;
+                       hostdata->c400_ctl_status = 0;
+                       hostdata->c400_blk_cnt = 1;
+                       hostdata->c400_host_buf = 4;
+                       break;
+               case BOARD_DTC3181E:
+                       hostdata->io_width = 2; /* 16-bit PDMA */
+                       /* fall through */
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       hostdata->c400_ctl_status = 9;
+                       hostdata->c400_blk_cnt = 10;
+                       hostdata->c400_host_buf = 8;
+                       break;
+               }
 #else
-               ((struct NCR5380_hostdata *)instance->hostdata)->iomem = iomem;
+               instance->base = overrides[current_override].NCR5380_map_name;
+               hostdata->iomem = iomem;
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+                       hostdata->c400_ctl_status = 0x100;
+                       hostdata->c400_blk_cnt = 0x101;
+                       hostdata->c400_host_buf = 0x104;
+                       break;
+               case BOARD_DTC3181E:
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
+                       goto out_unregister;
+               }
 #endif
 
-               NCR5380_init(instance, flags);
+               if (NCR5380_init(instance, flags))
+                       goto out_unregister;
+
+               switch (overrides[current_override].board) {
+               case BOARD_NCR53C400:
+               case BOARD_DTC3181E:
+               case BOARD_NCR53C400A:
+               case BOARD_HP_C2502:
+                       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+               }
+
+               NCR5380_maybe_reset_bus(instance);
 
                if (overrides[current_override].irq != IRQ_AUTO)
                        instance->irq = overrides[current_override].irq;
@@ -431,12 +494,18 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                if (instance->irq == 255)
                        instance->irq = NO_IRQ;
 
-               if (instance->irq != NO_IRQ)
+               if (instance->irq != NO_IRQ) {
+#ifndef SCSI_G_NCR5380_MEM
+                       /* set IRQ for HP C2502 */
+                       if (overrides[current_override].board == BOARD_HP_C2502)
+                               magic_configure(port_idx, instance->irq, magic);
+#endif
                        if (request_irq(instance->irq, generic_NCR5380_intr,
                                        0, "NCR5380", instance)) {
                                printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
                                instance->irq = NO_IRQ;
                        }
+               }
 
                if (instance->irq == NO_IRQ) {
                        printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
@@ -447,6 +516,17 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
                ++count;
        }
        return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_release:
+#ifndef SCSI_G_NCR5380_MEM
+       release_region(overrides[current_override].NCR5380_map_name, region_size);
+#else
+       iounmap(iomem);
+       release_mem_region(base, NCR5380_region_size);
+#endif
+       return count;
 }
 
 /**
@@ -460,21 +540,15 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
  
 static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       
        if (instance->irq != NO_IRQ)
                free_irq(instance->irq, instance);
        NCR5380_exit(instance);
-
 #ifndef SCSI_G_NCR5380_MEM
-       release_region(instance->NCR5380_instance_name, instance->n_io_port);
+       release_region(instance->io_port, instance->n_io_port);
 #else
        iounmap(((struct NCR5380_hostdata *)instance->hostdata)->iomem);
-       release_mem_region(instance->NCR5380_instance_name, NCR5380_region_size);
+       release_mem_region(instance->base, NCR5380_region_size);
 #endif
-
-
        return 0;
 }
 
@@ -507,7 +581,7 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
 }
 #endif
 
-#ifdef NCR53C400_PSEUDO_DMA
+#ifdef PSEUDO_DMA
 
 /**
  *     NCR5380_pread           -       pseudo DMA read
@@ -521,75 +595,68 @@ generic_NCR5380_biosparam(struct scsi_device *sdev, struct block_device *bdev,
  
 static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int blocks = len / 128;
        int start = 0;
-       int bl;
-
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
 
-       NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE | CSR_TRANS_DIR);
-       NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
+       NCR5380_write(hostdata->c400_blk_cnt, blocks);
        while (1) {
-               if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+               if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
                        break;
-               }
-               if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+               if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
                        printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
                        return -1;
                }
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY);
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+                       ; /* FIXME - no timeout */
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       int i;
-                       for (i = 0; i < 128; i++)
-                               dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
-               }
+               if (hostdata->io_width == 2)
+                       insw(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 64);
+               else
+                       insb(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+               memcpy_fromio(dst + start,
+                             hostdata->iomem + NCR53C400_host_buffer, 128);
 #endif
                start += 128;
                blocks--;
        }
 
        if (blocks) {
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
-               {
-                       // FIXME - no timeout
-               }
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+                       ; /* FIXME - no timeout */
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       int i;  
-                       for (i = 0; i < 128; i++)
-                               dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
-               }
+               if (hostdata->io_width == 2)
+                       insw(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 64);
+               else
+                       insb(instance->io_port + hostdata->c400_host_buf,
+                                                       dst + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+               memcpy_fromio(dst + start,
+                             hostdata->iomem + NCR53C400_host_buffer, 128);
 #endif
                start += 128;
                blocks--;
        }
 
-       if (!(NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
+       if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
                printk("53C400r: no 53C80 gated irq after transfer");
 
-#if 0
-       /*
-        *      DON'T DO THIS - THEY NEVER ARRIVE!
-        */
-       printk("53C400r: Waiting for 53C80 registers\n");
-       while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG)
+       /* wait for 53C80 registers to be available */
+       while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
                ;
-#endif
+
        if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
                printk(KERN_ERR "53C400r: no end dma signal\n");
                
-       NCR5380_write(MODE_REG, MR_BASE);
-       NCR5380_read(RESET_PARITY_INTERRUPT_REG);
        return 0;
 }
 
@@ -605,89 +672,91 @@ static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst,
 
 static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
 {
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
        int blocks = len / 128;
        int start = 0;
-       int bl;
-       int i;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
-       NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
-       NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+       NCR5380_write(hostdata->c400_blk_cnt, blocks);
        while (1) {
-               if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+               if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
                        printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
                        return -1;
                }
 
-               if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+               if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
                        break;
-               }
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
                        ; // FIXME - timeout
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       for (i = 0; i < 128; i++)
-                               NCR5380_write(C400_HOST_BUFFER, src[start + i]);
-               }
+               if (hostdata->io_width == 2)
+                       outsw(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 64);
+               else
+                       outsb(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+               memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+                           src + start, 128);
 #endif
                start += 128;
                blocks--;
        }
        if (blocks) {
-               while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+               while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
                        ; // FIXME - no timeout
 
 #ifndef SCSI_G_NCR5380_MEM
-               {
-                       for (i = 0; i < 128; i++)
-                               NCR5380_write(C400_HOST_BUFFER, src[start + i]);
-               }
+               if (hostdata->io_width == 2)
+                       outsw(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 64);
+               else
+                       outsb(instance->io_port + hostdata->c400_host_buf,
+                                                       src + start, 128);
 #else
                /* implies SCSI_G_NCR5380_MEM */
-               memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+               memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+                           src + start, 128);
 #endif
                start += 128;
                blocks--;
        }
 
-#if 0
-       printk("53C400w: waiting for registers to be available\n");
-       THEY NEVER DO ! while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG);
-       printk("53C400w: Got em\n");
-#endif
-
-       /* Let's wait for this instead - could be ugly */
-       /* All documentation says to check for this. Maybe my hardware is too
-        * fast. Waiting for it seems to work fine! KLL
-        */
-       while (!(i = NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
-               ;       // FIXME - no timeout
-
-       /*
-        * I know. i is certainly != 0 here but the loop is new. See previous
-        * comment.
-        */
-       if (i) {
-               if (!((i = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_END_DMA_TRANSFER))
-                       printk(KERN_ERR "53C400w: No END OF DMA bit - WHOOPS! BASR=%0x\n", i);
-       } else
-               printk(KERN_ERR "53C400w: no 53C80 gated irq after transfer (last block)\n");
+       /* wait for 53C80 registers to be available */
+       while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
+               udelay(4); /* DTC436 chip hangs without this */
+               /* FIXME - no timeout */
+       }
 
-#if 0
        if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
                printk(KERN_ERR "53C400w: no end dma signal\n");
        }
-#endif
+
        while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
                ;       // TIMEOUT
        return 0;
 }
-#endif                         /* PSEUDO_DMA */
+
+static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+       int transfersize = cmd->transfersize;
+
+       /* Limit transfers to 32K, for xx400 & xx406
+        * pseudoDMA that transfers in 128 bytes blocks.
+        */
+       if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+           !(cmd->SCp.this_residual % transfersize))
+               transfersize = 32 * 1024;
+
+       /* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
+       if (transfersize % 128)
+               transfersize = 0;
+
+       return transfersize;
+}
+
+#endif /* PSEUDO_DMA */
 
 /*
  *     Include the NCR5380 core code that we build our driver around   
@@ -696,22 +765,24 @@ static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src,
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
-       .show_info              = generic_NCR5380_show_info,
-       .name                   = "Generic NCR5380/NCR53C400 SCSI",
-       .detect                 = generic_NCR5380_detect,
-       .release                = generic_NCR5380_release_resources,
-       .info                   = generic_NCR5380_info,
-       .queuecommand           = generic_NCR5380_queue_command,
+       .proc_name              = DRV_MODULE_NAME,
+       .name                   = "Generic NCR5380/NCR53C400 SCSI",
+       .detect                 = generic_NCR5380_detect,
+       .release                = generic_NCR5380_release_resources,
+       .info                   = generic_NCR5380_info,
+       .queuecommand           = generic_NCR5380_queue_command,
        .eh_abort_handler       = generic_NCR5380_abort,
        .eh_bus_reset_handler   = generic_NCR5380_bus_reset,
-       .bios_param             = NCR5380_BIOSPARAM,
-       .can_queue              = CAN_QUEUE,
-        .this_id               = 7,
-        .sg_tablesize          = SG_ALL,
-       .cmd_per_lun            = CMD_PER_LUN,
-        .use_clustering                = DISABLE_CLUSTERING,
+       .bios_param             = NCR5380_BIOSPARAM,
+       .can_queue              = 16,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
-#include <linux/module.h>
+
 #include "scsi_module.c"
 
 module_param(ncr_irq, int, 0);
@@ -721,6 +792,7 @@ module_param(ncr_5380, int, 0);
 module_param(ncr_53c400, int, 0);
 module_param(ncr_53c400a, int, 0);
 module_param(dtc_3181e, int, 0);
+module_param(hp_c2502, int, 0);
 MODULE_LICENSE("GPL");
 
 #if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
index bea1a3b9b862dfd4b944072af3fbab22344bd66c..6f3d2ac4f185c2ff64f6c6a41721f604f08edab7 100644 (file)
 #ifndef GENERIC_NCR5380_H
 #define GENERIC_NCR5380_H
 
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define BIOSPARAM
 #define NCR5380_BIOSPARAM generic_NCR5380_biosparam
 #else
 #define NCR5380_BIOSPARAM NULL
 #endif
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 16
-#endif
-
 #define __STRVAL(x) #x
 #define STRVAL(x) __STRVAL(x)
 
 #ifndef SCSI_G_NCR5380_MEM
+#define DRV_MODULE_NAME "g_NCR5380"
 
-#define NCR5380_map_config port
 #define NCR5380_map_type int
 #define NCR5380_map_name port
-#define NCR5380_instance_name io_port
-#define NCR53C400_register_offset 0
-#define NCR53C400_address_adjust 8
 
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define NCR5380_region_size 16
 #else
 #define NCR5380_region_size 8
 #endif
 
-#define NCR5380_read(reg) (inb(NCR5380_map_name + (reg)))
-#define NCR5380_write(reg, value) (outb((value), (NCR5380_map_name + (reg))))
+#define NCR5380_read(reg) \
+       inb(instance->io_port + (reg))
+#define NCR5380_write(reg, value) \
+       outb(value, instance->io_port + (reg))
 
 #define NCR5380_implementation_fields \
-    NCR5380_map_type NCR5380_map_name
-
-#define NCR5380_local_declare() \
-    register NCR5380_implementation_fields
-
-#define NCR5380_setup(instance) \
-    NCR5380_map_name = (NCR5380_map_type)((instance)->NCR5380_instance_name)
+       int c400_ctl_status; \
+       int c400_blk_cnt; \
+       int c400_host_buf; \
+       int io_width;
 
 #else 
 /* therefore SCSI_G_NCR5380_MEM */
+#define DRV_MODULE_NAME "g_NCR5380_mmio"
 
-#define NCR5380_map_config memory
 #define NCR5380_map_type unsigned long
 #define NCR5380_map_name base
-#define NCR5380_instance_name base
-#define NCR53C400_register_offset 0x108
-#define NCR53C400_address_adjust 0
 #define NCR53C400_mem_base 0x3880
 #define NCR53C400_host_buffer 0x3900
 #define NCR5380_region_size 0x3a00
 
-#define NCR5380_read(reg) readb(iomem + NCR53C400_mem_base + (reg))
-#define NCR5380_write(reg, value) writeb(value, iomem + NCR53C400_mem_base + (reg))
+#define NCR5380_read(reg) \
+       readb(((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+             NCR53C400_mem_base + (reg))
+#define NCR5380_write(reg, value) \
+       writeb(value, ((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+              NCR53C400_mem_base + (reg))
 
 #define NCR5380_implementation_fields \
-    NCR5380_map_type NCR5380_map_name; \
-    void __iomem *iomem;
-
-#define NCR5380_local_declare() \
-    register void __iomem *iomem
-
-#define NCR5380_setup(instance) \
-    iomem = (((struct NCR5380_hostdata *)(instance)->hostdata)->iomem)
+       void __iomem *iomem; \
+       int c400_ctl_status; \
+       int c400_blk_cnt; \
+       int c400_host_buf;
 
 #endif
 
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+        generic_NCR5380_dma_xfer_len(cmd)
+
 #define NCR5380_intr generic_NCR5380_intr
 #define NCR5380_queue_command generic_NCR5380_queue_command
 #define NCR5380_abort generic_NCR5380_abort
 #define BOARD_NCR53C400        1
 #define BOARD_NCR53C400A 2
 #define BOARD_DTC3181E 3
+#define BOARD_HP_C2502 4
 
-#endif /* ndef ASM */
 #endif /* GENERIC_NCR5380_H */
 
index 37a0c7156087905b8cb749103e71d92547091e2e..b67661836c9fa26fccd1e23a82508c1d567a5916 100644 (file)
@@ -1,5 +1,7 @@
 config SCSI_HISI_SAS
        tristate "HiSilicon SAS"
+       depends on HAS_DMA
+       depends on ARM64 || COMPILE_TEST
        select SCSI_SAS_LIBSAS
        select BLK_DEV_INTEGRITY
        help
index d54381149c0d513b16cb355b5ec632636073321c..057fdeb720acec997f4a4a5952318c58ab2f0b3e 100644 (file)
 /* ITCT header */
 /* qw0 */
 #define ITCT_HDR_DEV_TYPE_OFF          0
-#define ITCT_HDR_DEV_TYPE_MSK          (0x3 << ITCT_HDR_DEV_TYPE_OFF)
+#define ITCT_HDR_DEV_TYPE_MSK          (0x3ULL << ITCT_HDR_DEV_TYPE_OFF)
 #define ITCT_HDR_VALID_OFF             2
-#define ITCT_HDR_VALID_MSK             (0x1 << ITCT_HDR_VALID_OFF)
-#define ITCT_HDR_BREAK_REPLY_ENA_OFF   3
-#define ITCT_HDR_BREAK_REPLY_ENA_MSK   (0x1 << ITCT_HDR_BREAK_REPLY_ENA_OFF)
+#define ITCT_HDR_VALID_MSK             (0x1ULL << ITCT_HDR_VALID_OFF)
 #define ITCT_HDR_AWT_CONTROL_OFF       4
-#define ITCT_HDR_AWT_CONTROL_MSK       (0x1 << ITCT_HDR_AWT_CONTROL_OFF)
+#define ITCT_HDR_AWT_CONTROL_MSK       (0x1ULL << ITCT_HDR_AWT_CONTROL_OFF)
 #define ITCT_HDR_MAX_CONN_RATE_OFF     5
-#define ITCT_HDR_MAX_CONN_RATE_MSK     (0xf << ITCT_HDR_MAX_CONN_RATE_OFF)
+#define ITCT_HDR_MAX_CONN_RATE_MSK     (0xfULL << ITCT_HDR_MAX_CONN_RATE_OFF)
 #define ITCT_HDR_VALID_LINK_NUM_OFF    9
-#define ITCT_HDR_VALID_LINK_NUM_MSK    (0xf << ITCT_HDR_VALID_LINK_NUM_OFF)
+#define ITCT_HDR_VALID_LINK_NUM_MSK    (0xfULL << ITCT_HDR_VALID_LINK_NUM_OFF)
 #define ITCT_HDR_PORT_ID_OFF           13
-#define ITCT_HDR_PORT_ID_MSK           (0x7 << ITCT_HDR_PORT_ID_OFF)
+#define ITCT_HDR_PORT_ID_MSK           (0x7ULL << ITCT_HDR_PORT_ID_OFF)
 #define ITCT_HDR_SMP_TIMEOUT_OFF       16
-#define ITCT_HDR_SMP_TIMEOUT_MSK       (0xffff << ITCT_HDR_SMP_TIMEOUT_OFF)
-#define ITCT_HDR_MAX_BURST_BYTES_OFF   16
-#define ITCT_HDR_MAX_BURST_BYTES_MSK   (0xffffffff << \
-                                       ITCT_MAX_BURST_BYTES_OFF)
+#define ITCT_HDR_SMP_TIMEOUT_MSK       (0xffffULL << ITCT_HDR_SMP_TIMEOUT_OFF)
 /* qw1 */
 #define ITCT_HDR_MAX_SAS_ADDR_OFF      0
 #define ITCT_HDR_MAX_SAS_ADDR_MSK      (0xffffffffffffffff << \
                                        ITCT_HDR_MAX_SAS_ADDR_OFF)
 /* qw2 */
 #define ITCT_HDR_IT_NEXUS_LOSS_TL_OFF  0
-#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK  (0xffff << \
+#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK  (0xffffULL << \
                                        ITCT_HDR_IT_NEXUS_LOSS_TL_OFF)
 #define ITCT_HDR_BUS_INACTIVE_TL_OFF   16
-#define ITCT_HDR_BUS_INACTIVE_TL_MSK   (0xffff << \
+#define ITCT_HDR_BUS_INACTIVE_TL_MSK   (0xffffULL << \
                                        ITCT_HDR_BUS_INACTIVE_TL_OFF)
 #define ITCT_HDR_MAX_CONN_TL_OFF       32
-#define ITCT_HDR_MAX_CONN_TL_MSK       (0xffff << \
+#define ITCT_HDR_MAX_CONN_TL_MSK       (0xffffULL << \
                                        ITCT_HDR_MAX_CONN_TL_OFF)
 #define ITCT_HDR_REJ_OPEN_TL_OFF       48
-#define ITCT_HDR_REJ_OPEN_TL_MSK       (0xffff << \
-                                       ITCT_REJ_OPEN_TL_OFF)
+#define ITCT_HDR_REJ_OPEN_TL_MSK       (0xffffULL << \
+                                       ITCT_HDR_REJ_OPEN_TL_OFF)
 
 /* Err record header */
 #define ERR_HDR_DMA_TX_ERR_TYPE_OFF    0
@@ -533,10 +528,10 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba,
        itct->sas_addr = __swab64(itct->sas_addr);
 
        /* qw2 */
-       itct->qw2 = cpu_to_le64((500 < ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
-                               (0xff00 < ITCT_HDR_BUS_INACTIVE_TL_OFF) |
-                               (0xff00 < ITCT_HDR_MAX_CONN_TL_OFF) |
-                               (0xff00 < ITCT_HDR_REJ_OPEN_TL_OFF));
+       itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_BUS_INACTIVE_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_MAX_CONN_TL_OFF) |
+                               (0xff00ULL << ITCT_HDR_REJ_OPEN_TL_OFF));
 }
 
 static void free_device_v1_hw(struct hisi_hba *hisi_hba,
@@ -544,7 +539,8 @@ static void free_device_v1_hw(struct hisi_hba *hisi_hba,
 {
        u64 dev_id = sas_dev->device_id;
        struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id];
-       u32 qw0, reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
+       u64 qw0;
+       u32 reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
 
        reg_val |= CFG_AGING_TIME_ITCT_REL_MSK;
        hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val);
index 4e1a632ccf162ea52365b84e71aab19b44619280..f8b88fa78e6274f8b5956d6ea6a3f057881132f5 100644 (file)
@@ -43,6 +43,7 @@ typedef struct {
        unsigned dp:1;          /* Data phase present           */
        unsigned rd:1;          /* Read data in data phase      */
        unsigned wanted:1;      /* Parport sharing busy flag    */
+       unsigned int dev_no;    /* Device number                */
        wait_queue_head_t *waiting;
        struct Scsi_Host *host;
        struct list_head list;
@@ -1120,15 +1121,40 @@ static struct scsi_host_template imm_template = {
 
 static LIST_HEAD(imm_hosts);
 
+/*
+ * Finds the first available device number that can be alloted to the
+ * new imm device and returns the address of the previous node so that
+ * we can add to the tail and have a list in the ascending order.
+ */
+
+static inline imm_struct *find_parent(void)
+{
+       imm_struct *dev, *par = NULL;
+       unsigned int cnt = 0;
+
+       if (list_empty(&imm_hosts))
+               return NULL;
+
+       list_for_each_entry(dev, &imm_hosts, list) {
+               if (dev->dev_no != cnt)
+                       return par;
+               cnt++;
+               par = dev;
+       }
+
+       return par;
+}
+
 static int __imm_attach(struct parport *pb)
 {
        struct Scsi_Host *host;
-       imm_struct *dev;
+       imm_struct *dev, *temp;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
        DEFINE_WAIT(wait);
        int ports;
        int modes, ppb;
        int err = -ENOMEM;
+       struct pardev_cb imm_cb;
 
        init_waitqueue_head(&waiting);
 
@@ -1141,9 +1167,15 @@ static int __imm_attach(struct parport *pb)
        dev->mode = IMM_AUTODETECT;
        INIT_LIST_HEAD(&dev->list);
 
-       dev->dev = parport_register_device(pb, "imm", NULL, imm_wakeup,
-                                               NULL, 0, dev);
+       temp = find_parent();
+       if (temp)
+               dev->dev_no = temp->dev_no + 1;
+
+       memset(&imm_cb, 0, sizeof(imm_cb));
+       imm_cb.private = dev;
+       imm_cb.wakeup = imm_wakeup;
 
+       dev->dev = parport_register_dev_model(pb, "imm", &imm_cb, dev->dev_no);
        if (!dev->dev)
                goto out;
 
@@ -1207,7 +1239,10 @@ static int __imm_attach(struct parport *pb)
        host->unique_id = pb->number;
        *(imm_struct **)&host->hostdata = dev;
        dev->host = host;
-       list_add_tail(&dev->list, &imm_hosts);
+       if (!temp)
+               list_add_tail(&dev->list, &imm_hosts);
+       else
+               list_add_tail(&dev->list, &temp->list);
        err = scsi_add_host(host, NULL);
        if (err)
                goto out2;
@@ -1245,9 +1280,10 @@ static void imm_detach(struct parport *pb)
 }
 
 static struct parport_driver imm_driver = {
-       .name   = "imm",
-       .attach = imm_attach,
-       .detach = imm_detach,
+       .name           = "imm",
+       .match_port     = imm_attach,
+       .detach         = imm_detach,
+       .devmodel       = true,
 };
 
 static int __init imm_driver_init(void)
index 536cd5a8042245f4ad46929a256098e047d745dd..3b3e0998fa6e72b71f5417c4f942ff9bb67caee0 100644 (file)
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
        .store = ipr_store_reset_adapter
 };
 
-static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+static int ipr_iopoll(struct irq_poll *iop, int budget);
  /**
  * ipr_show_iopoll_weight - Show ipr polling mode
  * @dev:       class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
        int i;
 
        if (!ioa_cfg->sis64) {
-               dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+               dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
                return -EINVAL;
        }
        if (kstrtoul(buf, 10, &user_iopoll_weight))
                return -EINVAL;
 
        if (user_iopoll_weight > 256) {
-               dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+               dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
                return -EINVAL;
        }
 
        if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
-               dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+               dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
                return strlen(buf);
        }
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++)
-                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+                       irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
        }
 
        spin_lock_irqsave(shost->host_lock, lock_flags);
        ioa_cfg->iopoll_weight = user_iopoll_weight;
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++) {
-                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                       irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
                                        ioa_cfg->iopoll_weight, ipr_iopoll);
-                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
                }
        }
        spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -4003,13 +4002,12 @@ static ssize_t ipr_store_update_fw(struct device *dev,
        struct ipr_sglist *sglist;
        char fname[100];
        char *src;
-       int len, result, dnld_size;
+       int result, dnld_size;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
 
-       len = snprintf(fname, 99, "%s", buf);
-       fname[len-1] = '\0';
+       snprintf(fname, sizeof(fname), "%s", buf);
 
        if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
                dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
@@ -5569,7 +5567,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
        return num_hrrq;
 }
 
-static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+static int ipr_iopoll(struct irq_poll *iop, int budget)
 {
        struct ipr_ioa_cfg *ioa_cfg;
        struct ipr_hrr_queue *hrrq;
@@ -5585,7 +5583,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
        completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
 
        if (completed_ops < budget)
-               blk_iopoll_complete(iop);
+               irq_poll_complete(iop);
        spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 
        list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5693,8 +5691,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
                       hrrq->toggle_bit) {
-                       if (!blk_iopoll_sched_prep(&hrrq->iopoll))
-                               blk_iopoll_sched(&hrrq->iopoll);
+                       irq_poll_sched(&hrrq->iopoll);
                        spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                        return IRQ_HANDLED;
                }
@@ -10405,9 +10402,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                for (i = 1; i < ioa_cfg->hrrq_num; i++) {
-                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                       irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
                                        ioa_cfg->iopoll_weight, ipr_iopoll);
-                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
                }
        }
 
@@ -10436,7 +10432,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
        if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
                ioa_cfg->iopoll_weight = 0;
                for (i = 1; i < ioa_cfg->hrrq_num; i++)
-                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+                       irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
        }
 
        while (ioa_cfg->in_reset_reload) {
index a34c7a5a995e4bcbf61b1d859fb784b60a9cdbee..56c57068300a1fb947221f8bc32f467aa115ae80 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/libata.h>
 #include <linux/list.h>
 #include <linux/kref.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
        u8 allow_cmds:1;
        u8 removing_ioa:1;
 
-       struct blk_iopoll iopoll;
+       struct irq_poll iopoll;
 };
 
 /* Command packet structure */
index d64a769b8155ccda7ab0d0368c9bb84eb717e6a6..bb2381314a2bfe91c1552f4f257dc41919014aac 100644 (file)
@@ -12,7 +12,6 @@
  */
 
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #define PSEUDO_DMA
 
 #define NCR5380_implementation_fields   unsigned char *pdma_base
-#define NCR5380_local_declare()         struct Scsi_Host *_instance
-#define NCR5380_setup(instance)         _instance = instance
 
-#define NCR5380_read(reg)               macscsi_read(_instance, reg)
-#define NCR5380_write(reg, value)       macscsi_write(_instance, reg, value)
+#define NCR5380_read(reg)               macscsi_read(instance, reg)
+#define NCR5380_write(reg, value)       macscsi_write(instance, reg, value)
 
 #define NCR5380_pread                   macscsi_pread
 #define NCR5380_pwrite                  macscsi_pwrite
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr                    macscsi_intr
 #define NCR5380_queue_command           macscsi_queue_command
@@ -51,8 +49,6 @@
 
 #include "NCR5380.h"
 
-#define RESET_BOOT
-
 static int setup_can_queue = -1;
 module_param(setup_can_queue, int, 0);
 static int setup_cmd_per_lun = -1;
@@ -65,17 +61,8 @@ static int setup_use_tagged_queuing = -1;
 module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#define TOSHIBA_DELAY
-
-#ifdef TOSHIBA_DELAY
-#define        AFTER_RESET_DELAY       (5*HZ/2)
-#else
-#define        AFTER_RESET_DELAY       (HZ/2)
-#endif
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
 
 /*
  * NCR 5380 register access functions
@@ -94,12 +81,12 @@ static inline void macscsi_write(struct Scsi_Host *instance, int reg, int value)
 #ifndef MODULE
 static int __init mac_scsi_setup(char *str)
 {
-       int ints[7];
+       int ints[8];
 
        (void)get_options(str, ARRAY_SIZE(ints), ints);
 
-       if (ints[0] < 1 || ints[0] > 6) {
-               pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>]]]]]\n");
+       if (ints[0] < 1) {
+               pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>[,<toshiba_delay>]]]]]]\n");
                return 0;
        }
        if (ints[0] >= 1)
@@ -114,50 +101,14 @@ static int __init mac_scsi_setup(char *str)
                setup_use_tagged_queuing = ints[5];
        if (ints[0] >= 6)
                setup_use_pdma = ints[6];
+       if (ints[0] >= 7)
+               setup_toshiba_delay = ints[7];
        return 1;
 }
 
 __setup("mac5380=", mac_scsi_setup);
 #endif /* !MODULE */
 
-#ifdef RESET_BOOT
-/*
- * Our 'bus reset on boot' function
- */
-
-static void mac_scsi_reset_boot(struct Scsi_Host *instance)
-{
-       unsigned long end;
-
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-       
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No messing
-        * with the queues, interrupts, or locks necessary here.
-        */
-
-       printk(KERN_INFO "Macintosh SCSI: resetting the SCSI bus..." );
-
-       /* get in phase */
-       NCR5380_write( TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-       /* assert RST */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay( 50 );
-       /* reset RST and interrupt */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-       for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-               barrier();
-
-       printk(KERN_INFO " done\n" );
-}
-#endif
-
 #ifdef PSEUDO_DMA
 /* 
    Pseudo-DMA: (Ove Edlund)
@@ -235,9 +186,6 @@ static int macscsi_pread(struct Scsi_Host *instance,
        unsigned char *d;
        unsigned char *s;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
        s = hostdata->pdma_base + (INPUT_DATA_REG << 4);
        d = dst;
 
@@ -329,9 +277,6 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
        unsigned char *s;
        unsigned char *d;
 
-       NCR5380_local_declare();
-       NCR5380_setup(instance);
-
        s = src;
        d = hostdata->pdma_base + (OUTPUT_DATA_REG << 4);
 
@@ -364,20 +309,22 @@ static int macscsi_pwrite(struct Scsi_Host *instance,
 #define PFX                     DRV_MODULE_NAME ": "
 
 static struct scsi_host_template mac_scsi_template = {
-       .module                         = THIS_MODULE,
-       .proc_name                      = DRV_MODULE_NAME,
-       .show_info                      = macscsi_show_info,
-       .write_info                     = macscsi_write_info,
-       .name                           = "Macintosh NCR5380 SCSI",
-       .info                           = macscsi_info,
-       .queuecommand                   = macscsi_queue_command,
-       .eh_abort_handler               = macscsi_abort,
-       .eh_bus_reset_handler           = macscsi_bus_reset,
-       .can_queue                      = 16,
-       .this_id                        = 7,
-       .sg_tablesize                   = SG_ALL,
-       .cmd_per_lun                    = 2,
-       .use_clustering                 = DISABLE_CLUSTERING
+       .module                 = THIS_MODULE,
+       .proc_name              = DRV_MODULE_NAME,
+       .show_info              = macscsi_show_info,
+       .write_info             = macscsi_write_info,
+       .name                   = "Macintosh NCR5380 SCSI",
+       .info                   = macscsi_info,
+       .queuecommand           = macscsi_queue_command,
+       .eh_abort_handler       = macscsi_abort,
+       .eh_bus_reset_handler   = macscsi_bus_reset,
+       .can_queue              = 16,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 
 static int __init mac_scsi_probe(struct platform_device *pdev)
@@ -432,15 +379,14 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
        } else
                host_flags |= FLAG_NO_PSEUDO_DMA;
 
-#ifdef RESET_BOOT
-       mac_scsi_reset_boot(instance);
-#endif
-
 #ifdef SUPPORT_TAGS
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
+       host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        if (instance->irq != NO_IRQ) {
                error = request_irq(instance->irq, macscsi_intr, IRQF_SHARED,
@@ -449,6 +395,8 @@ static int __init mac_scsi_probe(struct platform_device *pdev)
                        goto fail_irq;
        }
 
+       NCR5380_maybe_reset_bus(instance);
+
        error = scsi_add_host(instance, NULL);
        if (error)
                goto fail_host;
@@ -463,6 +411,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
        return error;
 }
index a70692779a16c770300b410eda56a9f1823e9239..4cf9ed96414f05c881221dabcc18ca5717bd2c11 100644 (file)
@@ -179,8 +179,12 @@ mraid_mm_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 
        /*
         * The following call will block till a kioc is available
+        * or return NULL if the list head is empty for the pointer
+        * of type mraid_mmapt passed to mraid_mm_alloc_kioc
         */
        kioc = mraid_mm_alloc_kioc(adp);
+       if (!kioc)
+               return -ENXIO;
 
        /*
         * User sent the old mimd_t ioctl packet. Convert it to uioc_t.
index e81eadd08afc776d6be23b2e2af8f550841da1b4..512037e27783d6192f1cea65b41eefb5fbaa9db4 100644 (file)
@@ -1,6 +1,4 @@
 #define PSEUDO_DMA
-#define UNSAFE  /* Not unsafe for PAS16 -- use it */
-#define PDEBUG 0
 
 /*
  * This driver adapted from Drew Eckhardt's Trantor T128 driver
  
 #include <linux/module.h>
 
-#include <linux/signal.h>
-#include <linux/proc_fs.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/stat.h>
 #include <linux/init.h>
 
 #include <scsi/scsi_host.h>
@@ -87,8 +81,8 @@
 #include "NCR5380.h"
 
 
-static unsigned short pas16_addr = 0;
-static int pas16_irq = 0;
+static unsigned short pas16_addr;
+static int pas16_irq;
  
 
 static const int scsi_irq_translate[] =
@@ -146,22 +140,6 @@ static const unsigned short  pas16_offset[ 8 ] =
                    * START_DMA_INITIATOR_RECEIVE_REG wo
                    */
     };
-/*----------------------------------------------------------------*/
-/* the following will set the monitor border color (useful to find
- where something crashed or gets stuck at */
-/* 1 = blue
- 2 = green
- 3 = cyan
- 4 = red
- 5 = magenta
- 6 = yellow
- 7 = white
-*/
-#if 1
-#define rtrc(i) {inb(0x3da); outb(0x31, 0x3c0); outb((i), 0x3c0);}
-#else
-#define rtrc(i) {}
-#endif
 
 
 /*
@@ -205,7 +183,7 @@ static void __init
        outb( 0x01, io_port + P_TIMEOUT_STATUS_REG_OFFSET );   /* Reset TC */
        outb( 0x01, io_port + WAIT_STATE );   /* 1 Wait state */
 
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
+       inb(io_port + pas16_offset[RESET_PARITY_INTERRUPT_REG]);
 
        /* Set the SCSI interrupt pointer without mucking up the sound
         * interrupt pointer in the same byte.
@@ -280,13 +258,13 @@ static int __init
      * put in an additional test to try to weed them out.
      */
 
-    outb( 0x01, io_port + WAIT_STATE );        /* 1 Wait state */
-    NCR5380_write( MODE_REG, 0x20 );           /* Is it really SCSI? */
-    if( NCR5380_read( MODE_REG ) != 0x20 )     /* Write to a reg.    */
-       return 0;                               /* and try to read    */
-    NCR5380_write( MODE_REG, 0x00 );           /* it back.           */
-    if( NCR5380_read( MODE_REG ) != 0x00 )
-       return 0;
+       outb(0x01, io_port + WAIT_STATE);             /* 1 Wait state */
+       outb(0x20, io_port + pas16_offset[MODE_REG]); /* Is it really SCSI? */
+       if (inb(io_port + pas16_offset[MODE_REG]) != 0x20) /* Write to a reg. */
+               return 0;                                  /* and try to read */
+       outb(0x00, io_port + pas16_offset[MODE_REG]);      /* it back. */
+       if (inb(io_port + pas16_offset[MODE_REG]) != 0x00)
+               return 0;
 
     return 1;
 }
@@ -305,7 +283,7 @@ static int __init
 
 static int __init pas16_setup(char *str)
 {
-    static int commandline_current = 0;
+       static int commandline_current;
     int i;
     int ints[10];
 
@@ -344,8 +322,8 @@ __setup("pas16=", pas16_setup);
 
 static int __init pas16_detect(struct scsi_host_template *tpnt)
 {
-    static int current_override = 0;
-    static unsigned short current_base = 0;
+       static int current_override;
+       static unsigned short current_base;
     struct Scsi_Host *instance;
     unsigned short io_port;
     int  count;
@@ -377,34 +355,32 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
        }
        else
            for (; !io_port && (current_base < NO_BASES); ++current_base) {
-#if (PDEBUG & PDEBUG_INIT)
-    printk("scsi-pas16 : probing io_port %04x\n", (unsigned int) bases[current_base].io_port);
-#endif
+               dprintk(NDEBUG_INIT, "pas16: probing io_port 0x%04x\n",
+                       (unsigned int)bases[current_base].io_port);
                if ( !bases[current_base].noauto &&
                     pas16_hw_detect( current_base ) ){
                        io_port = bases[current_base].io_port;
                        init_board( io_port, default_irqs[ current_base ], 0 ); 
-#if (PDEBUG & PDEBUG_INIT)
-                       printk("scsi-pas16 : detected board.\n");
-#endif
+                       dprintk(NDEBUG_INIT, "pas16: detected board\n");
                }
     }
 
-
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
-       printk("scsi-pas16 : io_port = %04x\n", (unsigned int) io_port);
-#endif
+       dprintk(NDEBUG_INIT, "pas16: io_port = 0x%04x\n",
+               (unsigned int)io_port);
 
        if (!io_port)
            break;
 
        instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
        if(instance == NULL)
-               break;
+               goto out;
                
        instance->io_port = io_port;
 
-       NCR5380_init(instance, 0);
+       if (NCR5380_init(instance, 0))
+               goto out_unregister;
+
+       NCR5380_maybe_reset_bus(instance);
 
        if (overrides[current_override].irq != IRQ_AUTO)
            instance->irq = overrides[current_override].irq;
@@ -431,14 +407,18 @@ static int __init pas16_detect(struct scsi_host_template *tpnt)
            outb( (inb(io_port + IO_CONFIG_3) & 0x0f), io_port + IO_CONFIG_3 );
        }
 
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
-       printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+       dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+               instance->host_no, instance->irq);
 
        ++current_override;
        ++count;
     }
     return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out:
+       return count;
 }
 
 /*
@@ -561,29 +541,29 @@ static int pas16_release(struct Scsi_Host *shost)
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
        return 0;
 }
 
 static struct scsi_host_template driver_template = {
-       .name           = "Pro Audio Spectrum-16 SCSI",
-       .detect         = pas16_detect,
-       .release        = pas16_release,
-       .proc_name      = "pas16",
-       .show_info      = pas16_show_info,
-       .write_info     = pas16_write_info,
-       .info           = pas16_info,
-       .queuecommand   = pas16_queue_command,
-       .eh_abort_handler = pas16_abort,
-       .eh_bus_reset_handler = pas16_bus_reset,
-       .bios_param     = pas16_biosparam, 
-       .can_queue      = CAN_QUEUE,
-       .this_id        = 7,
-       .sg_tablesize   = SG_ALL,
-       .cmd_per_lun    = CMD_PER_LUN,
-       .use_clustering = DISABLE_CLUSTERING,
+       .name                   = "Pro Audio Spectrum-16 SCSI",
+       .detect                 = pas16_detect,
+       .release                = pas16_release,
+       .proc_name              = "pas16",
+       .show_info              = pas16_show_info,
+       .write_info             = pas16_write_info,
+       .info                   = pas16_info,
+       .queuecommand           = pas16_queue_command,
+       .eh_abort_handler       = pas16_abort,
+       .eh_bus_reset_handler   = pas16_bus_reset,
+       .bios_param             = pas16_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
 
index c6109c80050bdf59d2b6933ba43579f8afa896ed..d375277172258391ed84ff1627c060f007e1f188 100644 (file)
@@ -24,9 +24,6 @@
 #ifndef PAS16_H
 #define PAS16_H
 
-#define PDEBUG_INIT    0x1
-#define PDEBUG_TRANSFER 0x2
-
 #define PAS16_DEFAULT_BASE_1  0x388
 #define PAS16_DEFAULT_BASE_2  0x384
 #define PAS16_DEFAULT_BASE_3  0x38c
 #define OPERATION_MODE_1 0xec03
 #define IO_CONFIG_3 0xf002
 
+#define NCR5380_implementation_fields /* none */
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32 
-#endif
-
-#define NCR5380_implementation_fields \
-    volatile unsigned short io_port
-
-#define NCR5380_local_declare() \
-    volatile unsigned short io_port
+#define PAS16_io_port(reg) (instance->io_port + pas16_offset[(reg)])
 
-#define NCR5380_setup(instance) \
-    io_port = (instance)->io_port
-
-#define PAS16_io_port(reg) ( io_port + pas16_offset[(reg)] )
-
-#if !(PDEBUG & PDEBUG_TRANSFER) 
 #define NCR5380_read(reg) ( inb(PAS16_io_port(reg)) )
 #define NCR5380_write(reg, value) ( outb((value),PAS16_io_port(reg)) )
-#else
-#define NCR5380_read(reg)                                              \
-    (((unsigned char) printk("scsi%d : read register %d at io_port %04x\n"\
-    , instance->hostno, (reg), PAS16_io_port(reg))), inb( PAS16_io_port(reg)) )
-
-#define NCR5380_write(reg, value)                                      \
-    (printk("scsi%d : write %02x to register %d at io_port %04x\n",    \
-           instance->hostno, (value), (reg), PAS16_io_port(reg)),      \
-    outb( (value),PAS16_io_port(reg) ) )
-
-#endif
 
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr pas16_intr
-#define do_NCR5380_intr do_pas16_intr
 #define NCR5380_queue_command pas16_queue_command
 #define NCR5380_abort pas16_abort
 #define NCR5380_bus_reset pas16_bus_reset
    
 #define PAS16_IRQS 0xd4a8 
 
-#endif /* ndef ASM */
 #endif /* PAS16_H */
index 2c1160c7ec92fdd48c078c4862a2dac7904edc31..47b9d13f97b880033c20144968cf715c8af41623 100644 (file)
@@ -227,6 +227,7 @@ static struct {
        {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
        {"Promise", "", NULL, BLIST_SPARSELUN},
        {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
+       {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
        {"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
        {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
        {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
index 4e08d1cd704d1c261c82a98067649154f433ecd1..bb669d32ccd0daee203a69840313fcb9cf343ee0 100644 (file)
@@ -2893,7 +2893,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
            sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
            sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
                rw_max = q->limits.io_opt =
-                       logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+                       sdkp->opt_xfer_blocks * sdp->sector_size;
        else
                rw_max = BLK_DEF_MAX_SECTORS;
 
@@ -3268,8 +3268,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        int ret = 0;
 
-       if (!sdkp)
-               return 0;       /* this can happen */
+       if (!sdkp)      /* E.g.: runtime suspend following sd_remove() */
+               return 0;
 
        if (sdkp->WCE && sdkp->media_present) {
                sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@ static int sd_resume(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+
        if (!sdkp->device->manage_start_stop)
                return 0;
 
index 503ab8b46c0b4e8a73d7e826ba1aaf3be1abd6b0..5e820674432ca38c46ad154b831c8eb19e974d34 100644 (file)
@@ -1261,7 +1261,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
        }
 
        sfp->mmap_called = 1;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+       vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
        vma->vm_private_data = sfp;
        vma->vm_ops = &sg_mmap_vm_ops;
        return 0;
index 8bd54a64efd6a52aef163f0cbe4f34f5ef9ac71c..64c867405ad4ff2d5e53972a8ba34f785cc81962 100644 (file)
@@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev)
 {
        struct scsi_cd *cd = dev_get_drvdata(dev);
 
+       if (!cd)        /* E.g.: runtime suspend following sr_remove() */
+               return 0;
+
        if (cd->media_present)
                return -EBUSY;
        else
@@ -985,6 +988,7 @@ static int sr_remove(struct device *dev)
        scsi_autopm_get_device(cd->device);
 
        del_gendisk(cd->disk);
+       dev_set_drvdata(dev, NULL);
 
        mutex_lock(&sr_ref_mutex);
        kref_put(&cd->kref, sr_kref_release);
index 41c115c230d9bf66da94693597967d037fdefaf6..55627d097873a85780a878e32b32c20e6c4a9524 100644 (file)
@@ -390,7 +390,7 @@ module_param(storvsc_ringbuffer_size, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
 
 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
-MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
index 22a42836d193a3723c4cb1193a0c72aeb9d99ffe..b9de487bbd311d4aeec4e215cf698c959f4b1db6 100644 (file)
 #define NCR5380_queue_command           sun3scsi_queue_command
 #define NCR5380_bus_reset               sun3scsi_bus_reset
 #define NCR5380_abort                   sun3scsi_abort
-#define NCR5380_show_info               sun3scsi_show_info
 #define NCR5380_info                    sun3scsi_info
 
 #define NCR5380_dma_read_setup(instance, data, count) \
-        sun3scsi_dma_setup(data, count, 0)
+        sun3scsi_dma_setup(instance, data, count, 0)
 #define NCR5380_dma_write_setup(instance, data, count) \
-        sun3scsi_dma_setup(data, count, 1)
+        sun3scsi_dma_setup(instance, data, count, 1)
 #define NCR5380_dma_residual(instance) \
         sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
@@ -86,10 +85,6 @@ module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 
-/* #define RESET_BOOT */
-
-#define        AFTER_RESET_DELAY       (HZ/2)
-
 /* ms to wait after hitting dma regs */
 #define SUN3_DMA_DELAY 10
 
@@ -100,11 +95,10 @@ static struct scsi_cmnd *sun3_dma_setup_done;
 static unsigned char *sun3_scsi_regp;
 static volatile struct sun3_dma_regs *dregs;
 static struct sun3_udc_regs *udc_regs;
-static unsigned char *sun3_dma_orig_addr = NULL;
-static unsigned long sun3_dma_orig_count = 0;
-static int sun3_dma_active = 0;
-static unsigned long last_residual = 0;
-static struct Scsi_Host *default_instance;
+static unsigned char *sun3_dma_orig_addr;
+static unsigned long sun3_dma_orig_count;
+static int sun3_dma_active;
+static unsigned long last_residual;
 
 /*
  * NCR 5380 register access functions
@@ -144,50 +138,12 @@ static inline void sun3_udc_write(unsigned short val, unsigned char reg)
 }
 #endif
 
-#ifdef RESET_BOOT
-static void sun3_scsi_reset_boot(struct Scsi_Host *instance)
-{
-       unsigned long end;
-       
-       /*
-        * Do a SCSI reset to clean up the bus during initialization. No
-        * messing with the queues, interrupts, or locks necessary here.
-        */
-
-       printk( "Sun3 SCSI: resetting the SCSI bus..." );
-
-       /* switch off SCSI IRQ - catch an interrupt without IRQ bit set else */
-//             sun3_disable_irq( IRQ_SUN3_SCSI );
-
-       /* get in phase */
-       NCR5380_write( TARGET_COMMAND_REG,
-                     PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
-       /* assert RST */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-
-       /* The min. reset hold time is 25us, so 40us should be enough */
-       udelay( 50 );
-
-       /* reset RST and interrupt */
-       NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
-       NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
-       for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
-               barrier();
-
-       /* switch on SCSI IRQ again */
-//             sun3_enable_irq( IRQ_SUN3_SCSI );
-
-       printk( " done\n" );
-}
-#endif
-
 // safe bits for the CSR
 #define CSR_GOOD 0x060f
 
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
+static irqreturn_t scsi_sun3_intr(int irq, void *dev)
 {
+       struct Scsi_Host *instance = dev;
        unsigned short csr = dregs->csr;
        int handled = 0;
 
@@ -196,46 +152,24 @@ static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
 #endif
 
        if(csr & ~CSR_GOOD) {
-               if(csr & CSR_DMA_BUSERR) {
-                       printk("scsi%d: bus error in dma\n", default_instance->host_no);
-               }
-
-               if(csr & CSR_DMA_CONFLICT) {
-                       printk("scsi%d: dma conflict\n", default_instance->host_no);
-               }
+               if (csr & CSR_DMA_BUSERR)
+                       shost_printk(KERN_ERR, instance, "bus error in DMA\n");
+               if (csr & CSR_DMA_CONFLICT)
+                       shost_printk(KERN_ERR, instance, "DMA conflict\n");
                handled = 1;
        }
 
        if(csr & (CSR_SDB_INT | CSR_DMA_INT)) {
-               NCR5380_intr(irq, dummy);
+               NCR5380_intr(irq, dev);
                handled = 1;
        }
 
        return IRQ_RETVAL(handled);
 }
 
-/*
- * Debug stuff - to be called on NMI, or sysrq key. Use at your own risk; 
- * reentering NCR5380_print_status seems to have ugly side effects
- */
-
-/* this doesn't seem to get used at all -- sam */
-#if 0
-void sun3_sun3_debug (void)
-{
-       unsigned long flags;
-
-       if (default_instance) {
-                       local_irq_save(flags);
-                       NCR5380_print_status(default_instance);
-                       local_irq_restore(flags);
-       }
-}
-#endif
-
-
 /* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
-static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
+static unsigned long sun3scsi_dma_setup(struct Scsi_Host *instance,
+                                void *data, unsigned long count, int write_flag)
 {
        void *addr;
 
@@ -287,10 +221,9 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
        dregs->csr |= CSR_FIFO;
        
        if(dregs->fifo_count != count) { 
-               printk("scsi%d: fifo_mismatch %04x not %04x\n",
-                      default_instance->host_no, dregs->fifo_count,
-                      (unsigned int) count);
-               NCR5380_dprint(NDEBUG_DMA, default_instance);
+               shost_printk(KERN_ERR, instance, "FIFO mismatch %04x not %04x\n",
+                            dregs->fifo_count, (unsigned int) count);
+               NCR5380_dprint(NDEBUG_DMA, instance);
        }
 
        /* setup udc */
@@ -325,21 +258,6 @@ static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int wri
 
 }
 
-#ifndef SUN3_SCSI_VME
-static inline unsigned long sun3scsi_dma_count(struct Scsi_Host *instance)
-{
-       unsigned short resid;
-
-       dregs->udc_addr = 0x32; 
-       udelay(SUN3_DMA_DELAY);
-       resid = dregs->udc_data;
-       udelay(SUN3_DMA_DELAY);
-       resid *= 2;
-
-       return (unsigned long) resid;
-}
-#endif
-
 static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 {
        return last_residual;
@@ -437,7 +355,10 @@ static int sun3scsi_dma_finish(int write_flag)
                }
        }
 
-       count = sun3scsi_dma_count(default_instance);
+       dregs->udc_addr = 0x32;
+       udelay(SUN3_DMA_DELAY);
+       count = 2 * dregs->udc_data;
+       udelay(SUN3_DMA_DELAY);
 
        fifo = dregs->fifo_count;
        last_residual = fifo;
@@ -502,17 +423,17 @@ static int sun3scsi_dma_finish(int write_flag)
 static struct scsi_host_template sun3_scsi_template = {
        .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
-       .show_info              = sun3scsi_show_info,
        .name                   = SUN3_SCSI_NAME,
        .info                   = sun3scsi_info,
        .queuecommand           = sun3scsi_queue_command,
-       .eh_abort_handler       = sun3scsi_abort,
-       .eh_bus_reset_handler   = sun3scsi_bus_reset,
+       .eh_abort_handler       = sun3scsi_abort,
+       .eh_bus_reset_handler   = sun3scsi_bus_reset,
        .can_queue              = 16,
        .this_id                = 7,
        .sg_tablesize           = SG_NONE,
        .cmd_per_lun            = 2,
-       .use_clustering         = DISABLE_CLUSTERING
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
 };
 
 static int __init sun3_scsi_probe(struct platform_device *pdev)
@@ -591,7 +512,6 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
                error = -ENOMEM;
                goto fail_alloc;
        }
-       default_instance = instance;
 
        instance->io_port = (unsigned long)ioaddr;
        instance->irq = irq->start;
@@ -600,7 +520,9 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
        host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
 #endif
 
-       NCR5380_init(instance, host_flags);
+       error = NCR5380_init(instance, host_flags);
+       if (error)
+               goto fail_init;
 
        error = request_irq(instance->irq, scsi_sun3_intr, 0,
                            "NCR5380", instance);
@@ -631,9 +553,7 @@ static int __init sun3_scsi_probe(struct platform_device *pdev)
        dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
 #endif
 
-#ifdef RESET_BOOT
-       sun3_scsi_reset_boot(instance);
-#endif
+       NCR5380_maybe_reset_bus(instance);
 
        error = scsi_add_host(instance, NULL);
        if (error)
@@ -649,6 +569,7 @@ fail_host:
                free_irq(instance->irq, instance);
 fail_irq:
        NCR5380_exit(instance);
+fail_init:
        scsi_host_put(instance);
 fail_alloc:
        if (udc_regs)
index 87828acbf7c63601d618909a59028a3b4ffa6372..4615fda60dbdf6477ff28566740f59f6f51975ea 100644 (file)
  * 15 9-11
  */
  
-#include <linux/signal.h>
 #include <linux/io.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
-#include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/delay.h>
 
 #include <scsi/scsi_host.h>
 #include "t128.h"
@@ -126,7 +123,7 @@ static struct signature {
 
 static int __init t128_setup(char *str)
 {
-    static int commandline_current = 0;
+       static int commandline_current;
     int i;
     int ints[10];
 
@@ -165,7 +162,7 @@ __setup("t128=", t128_setup);
 
 static int __init t128_detect(struct scsi_host_template *tpnt)
 {
-    static int current_override = 0, current_base = 0;
+       static int current_override, current_base;
     struct Scsi_Host *instance;
     unsigned long base;
     void __iomem *p;
@@ -182,9 +179,8 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
                base = 0;
        } else 
            for (; !base && (current_base < NO_BASES); ++current_base) {
-#if (TDEBUG & TDEBUG_INIT)
-    printk("scsi-t128 : probing address %08x\n", bases[current_base].address);
-#endif
+               dprintk(NDEBUG_INIT, "t128: probing address 0x%08x\n",
+                       bases[current_base].address);
                if (bases[current_base].noauto)
                        continue;
                p = ioremap(bases[current_base].address, 0x2000);
@@ -195,17 +191,13 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
                                        signatures[sig].string,
                                        strlen(signatures[sig].string))) {
                        base = bases[current_base].address;
-#if (TDEBUG & TDEBUG_INIT)
-                       printk("scsi-t128 : detected board.\n");
-#endif
+                       dprintk(NDEBUG_INIT, "t128: detected board\n");
                        goto found;
                    }
                iounmap(p);
            }
 
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
-       printk("scsi-t128 : base = %08x\n", (unsigned int) base);
-#endif
+       dprintk(NDEBUG_INIT, "t128: base = 0x%08x\n", (unsigned int)base);
 
        if (!base)
            break;
@@ -213,12 +205,15 @@ static int __init t128_detect(struct scsi_host_template *tpnt)
 found:
        instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
        if(instance == NULL)
-               break;
-               
+               goto out_unmap;
+
        instance->base = base;
        ((struct NCR5380_hostdata *)instance->hostdata)->base = p;
 
-       NCR5380_init(instance, 0);
+       if (NCR5380_init(instance, 0))
+               goto out_unregister;
+
+       NCR5380_maybe_reset_bus(instance);
 
        if (overrides[current_override].irq != IRQ_AUTO)
            instance->irq = overrides[current_override].irq;
@@ -242,27 +237,30 @@ found:
            printk("scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
        }
 
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
-       printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+       dprintk(NDEBUG_INIT, "scsi%d: irq = %d\n",
+               instance->host_no, instance->irq);
 
        ++current_override;
        ++count;
     }
     return count;
+
+out_unregister:
+       scsi_unregister(instance);
+out_unmap:
+       iounmap(p);
+       return count;
 }
 
 static int t128_release(struct Scsi_Host *shost)
 {
-       NCR5380_local_declare();
-       NCR5380_setup(shost);
+       struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
        if (shost->irq != NO_IRQ)
                free_irq(shost->irq, shost);
        NCR5380_exit(shost);
-       if (shost->io_port && shost->n_io_port)
-               release_region(shost->io_port, shost->n_io_port);
        scsi_unregister(shost);
-       iounmap(base);
+       iounmap(hostdata->base);
        return 0;
 }
 
@@ -308,14 +306,14 @@ static int t128_biosparam(struct scsi_device *sdev, struct block_device *bdev,
  *     timeout.
  */
 
-static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
-    int len) {
-    NCR5380_local_declare();
-    void __iomem *reg;
+static inline int
+NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       void __iomem *reg, *base = hostdata->base;
     unsigned char *d = dst;
     register int i = len;
 
-    NCR5380_setup(instance);
     reg = base + T_DATA_REG_OFFSET;
 
 #if 0
@@ -354,14 +352,14 @@ static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
  *     timeout.
  */
 
-static inline int NCR5380_pwrite (struct Scsi_Host *instance, unsigned char *src,
-    int len) {
-    NCR5380_local_declare();
-    void __iomem *reg;
+static inline int
+NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
+{
+       struct NCR5380_hostdata *hostdata = shost_priv(instance);
+       void __iomem *reg, *base = hostdata->base;
     unsigned char *s = src;
     register int i = len;
 
-    NCR5380_setup(instance);
     reg = base + T_DATA_REG_OFFSET;
 
 #if 0
@@ -392,21 +390,23 @@ MODULE_LICENSE("GPL");
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
-       .name           = "Trantor T128/T128F/T228",
-       .detect         = t128_detect,
-       .release        = t128_release,
-       .proc_name      = "t128",
-       .show_info      = t128_show_info,
-       .write_info     = t128_write_info,
-       .info           = t128_info,
-       .queuecommand   = t128_queue_command,
-       .eh_abort_handler = t128_abort,
-       .eh_bus_reset_handler    = t128_bus_reset,
-       .bios_param     = t128_biosparam,
-       .can_queue      = CAN_QUEUE,
-        .this_id        = 7,
-       .sg_tablesize   = SG_ALL,
-       .cmd_per_lun    = CMD_PER_LUN,
-       .use_clustering = DISABLE_CLUSTERING,
+       .name                   = "Trantor T128/T128F/T228",
+       .detect                 = t128_detect,
+       .release                = t128_release,
+       .proc_name              = "t128",
+       .show_info              = t128_show_info,
+       .write_info             = t128_write_info,
+       .info                   = t128_info,
+       .queuecommand           = t128_queue_command,
+       .eh_abort_handler       = t128_abort,
+       .eh_bus_reset_handler   = t128_bus_reset,
+       .bios_param             = t128_biosparam,
+       .can_queue              = 32,
+       .this_id                = 7,
+       .sg_tablesize           = SG_ALL,
+       .cmd_per_lun            = 2,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .cmd_size               = NCR5380_CMD_SIZE,
+       .max_sectors            = 128,
 };
 #include "scsi_module.c"
index 2c7371454dfd40cf5d398f64c9072d5f11f9e03d..dd16d85497e168680a5d3467f24baf0cbfe1e05a 100644 (file)
 #ifndef T128_H
 #define T128_H
 
-#define TDEBUG         0
-#define TDEBUG_INIT    0x1
-#define TDEBUG_TRANSFER 0x2
-
 /*
  * The trantor boards are memory mapped. They use an NCR5380 or
  * equivalent (my sample board had part second sourced from ZILOG).
 
 #define T_DATA_REG_OFFSET      0x1e00  /* rw 512 bytes long */
 
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
 #define NCR5380_implementation_fields \
     void __iomem *base
 
-#define NCR5380_local_declare() \
-    void __iomem *base
-
-#define NCR5380_setup(instance) \
-    base = ((struct NCR5380_hostdata *)(instance->hostdata))->base
+#define T128_address(reg) \
+       (((struct NCR5380_hostdata *)shost_priv(instance))->base + T_5380_OFFSET + ((reg) * 0x20))
 
-#define T128_address(reg) (base + T_5380_OFFSET + ((reg) * 0x20))
-
-#if !(TDEBUG & TDEBUG_TRANSFER)
 #define NCR5380_read(reg) readb(T128_address(reg))
 #define NCR5380_write(reg, value) writeb((value),(T128_address(reg)))
-#else
-#define NCR5380_read(reg)                                              \
-    (((unsigned char) printk("scsi%d : read register %d at address %08x\n"\
-    , instance->hostno, (reg), T128_address(reg))), readb(T128_address(reg)))
-
-#define NCR5380_write(reg, value) {                                    \
-    printk("scsi%d : write %02x to register %d at address %08x\n",     \
-           instance->hostno, (value), (reg), T128_address(reg));       \
-    writeb((value), (T128_address(reg)));                              \
-}
-#endif
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase)     (cmd->transfersize)
 
 #define NCR5380_intr t128_intr
-#define do_NCR5380_intr do_t128_intr
 #define NCR5380_queue_command t128_queue_command
 #define NCR5380_abort t128_abort
 #define NCR5380_bus_reset t128_bus_reset
 
 #define T128_IRQS 0xc4a8
 
-#endif /* ndef ASM */
 #endif /* T128_H */
index fb2b3935b658f5bae0286b8153bafe7753ef6c8c..88260205a2614c84e8293bf82ca47c49abb2e29d 100644 (file)
@@ -7,6 +7,7 @@ source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
 source "drivers/soc/sunxi/Kconfig"
+source "drivers/soc/tegra/Kconfig"
 source "drivers/soc/ti/Kconfig"
 source "drivers/soc/versatile/Kconfig"
 
index 0ad66fa9bb1aa47c97a7a5c1cae80f7bbc945473..5548a31e1a39a100142b45841cbe38e1aa007e38 100644 (file)
@@ -288,7 +288,7 @@ static struct spm_driver_data *spm_get_drv(struct platform_device *pdev,
        struct spm_driver_data *drv = NULL;
        struct device_node *cpu_node, *saw_node;
        int cpu;
-       bool found;
+       bool found = 0;
 
        for_each_possible_cpu(cpu) {
                cpu_node = of_cpu_device_node_get(cpu);
diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
new file mode 100644 (file)
index 0000000..d0c3c3e
--- /dev/null
@@ -0,0 +1,83 @@
+if ARCH_TEGRA
+
+# 32-bit ARM SoCs
+if ARM
+
+config ARCH_TEGRA_2x_SOC
+       bool "Enable support for Tegra20 family"
+       select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+       select ARM_ERRATA_720789
+       select ARM_ERRATA_754327 if SMP
+       select ARM_ERRATA_764369 if SMP
+       select PINCTRL_TEGRA20
+       select PL310_ERRATA_727915 if CACHE_L2X0
+       select PL310_ERRATA_769419 if CACHE_L2X0
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra AP20 and T20 processors, based on the
+         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_3x_SOC
+       bool "Enable support for Tegra30 family"
+       select ARM_ERRATA_754322
+       select ARM_ERRATA_764369 if SMP
+       select PINCTRL_TEGRA30
+       select PL310_ERRATA_769419 if CACHE_L2X0
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T30 processor family, based on the
+         ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_114_SOC
+       bool "Enable support for Tegra114 family"
+       select ARM_ERRATA_798181 if SMP
+       select ARM_L1_CACHE_SHIFT_6
+       select HAVE_ARM_ARCH_TIMER
+       select PINCTRL_TEGRA114
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T114 processor family, based on the
+         ARM CortexA15MP CPU
+
+config ARCH_TEGRA_124_SOC
+       bool "Enable support for Tegra124 family"
+       select ARM_L1_CACHE_SHIFT_6
+       select HAVE_ARM_ARCH_TIMER
+       select PINCTRL_TEGRA124
+       select TEGRA_TIMER
+       help
+         Support for NVIDIA Tegra T124 processor family, based on the
+         ARM CortexA15MP CPU
+
+endif
+
+# 64-bit ARM SoCs
+if ARM64
+
+config ARCH_TEGRA_132_SOC
+       bool "NVIDIA Tegra132 SoC"
+       select PINCTRL_TEGRA124
+       help
+         Enable support for NVIDIA Tegra132 SoC, based on the Denver
+         ARMv8 CPU.  The Tegra132 SoC is similar to the Tegra124 SoC,
+         but contains an NVIDIA Denver CPU complex in place of
+         Tegra124's "4+1" Cortex-A15 CPU complex.
+
+config ARCH_TEGRA_210_SOC
+       bool "NVIDIA Tegra210 SoC"
+       select PINCTRL_TEGRA210
+       help
+         Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1,
+         the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53
+         cores in a switched configuration. It features a GPU of the Maxwell
+         architecture with support for DX11, SM4, OpenGL 4.5, OpenGL ES 3.1
+         and providing 256 CUDA cores. It supports hardware-accelerated en-
+         and decoding of various video standards including H.265, H.264 and
+         VP8 at 4K resolution and up to 60 fps.
+
+         Besides the multimedia features it also comes with a variety of I/O
+         controllers, such as GPIO, I2C, SPI, SDHCI, PCIe, SATA and XHCI, to
+         name only a few.
+
+endif
+endif
index cde5ff7529eb28b26e684eea53f4e168361937ff..d1a750760cf30b270b44929ac91eb22971d15b22 100644 (file)
@@ -613,9 +613,10 @@ out:
        return err;
 }
 
-static int ssb_bus_register(struct ssb_bus *bus,
-                           ssb_invariants_func_t get_invariants,
-                           unsigned long baseaddr)
+static int __maybe_unused
+ssb_bus_register(struct ssb_bus *bus,
+                ssb_invariants_func_t get_invariants,
+                unsigned long baseaddr)
 {
        int err;
 
index 58d4517e18369f984fd1fac9bfe53b9cc9ba4a3a..b9519be90fdae94ad5dbd278a336216de2971ce8 100644 (file)
@@ -6,6 +6,7 @@ menu "Analog to digital converters"
 config AD7606
        tristate "Analog Devices AD7606 ADC driver"
        depends on GPIOLIB || COMPILE_TEST
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index f129039bece3c74b96fb78e70996ea65279502ea..69287108f793bcb81bcf91451b64f1f4f86c2257 100644 (file)
@@ -217,8 +217,12 @@ error_ret:
 static int ade7753_reset(struct device *dev)
 {
        u16 val;
+       int ret;
+
+       ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+       if (ret)
+               return ret;
 
-       ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
        val |= BIT(6); /* Software Chip Reset */
 
        return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
@@ -343,8 +347,12 @@ error_ret:
 static int ade7753_stop_device(struct device *dev)
 {
        u16 val;
+       int ret;
+
+       ret = ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
+       if (ret)
+               return ret;
 
-       ade7753_spi_read_reg_16(dev, ADE7753_MODE, &val);
        val |= BIT(4);  /* AD converters can be turned off */
 
        return ade7753_spi_write_reg_16(dev, ADE7753_MODE, val);
index d6273e143324e196b4ef2dbd9af967f3516235fa..a80d993b882eb1b3a76cab8ac6ce5ec0814a2e9f 100644 (file)
@@ -151,16 +151,12 @@ do {                                                                          \
 
 #define LIBCFS_FREE(ptr, size)                                   \
 do {                                                               \
-       int s = (size);                                          \
        if (unlikely((ptr) == NULL)) {                            \
                CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
-                      "%s:%d\n", s, __FILE__, __LINE__);              \
+                      "%s:%d\n", (int)(size), __FILE__, __LINE__);     \
                break;                                            \
        }                                                              \
-       if (unlikely(s > LIBCFS_VMALLOC_SIZE))                    \
-               vfree(ptr);                                 \
-       else                                                        \
-               kfree(ptr);                                       \
+       kvfree(ptr);                                      \
 } while (0)
 
 /******************************************************************************/
index 72af486b65df70ee2b011ee745bef74c33beec5c..cb74ae731b955d623b9d7cce3c3cb3441229365e 100644 (file)
@@ -2070,32 +2070,13 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 
 static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
 {
-       struct ib_device_attr *attr;
-       int rc;
-
        /* It's safe to assume a HCA can handle a page size
         * matching that of the native system */
        hdev->ibh_page_shift = PAGE_SHIFT;
        hdev->ibh_page_size  = 1 << PAGE_SHIFT;
        hdev->ibh_page_mask  = ~((__u64)hdev->ibh_page_size - 1);
 
-       LIBCFS_ALLOC(attr, sizeof(*attr));
-       if (attr == NULL) {
-               CERROR("Out of memory\n");
-               return -ENOMEM;
-       }
-
-       rc = ib_query_device(hdev->ibh_ibdev, attr);
-       if (rc == 0)
-               hdev->ibh_mr_size = attr->max_mr_size;
-
-       LIBCFS_FREE(attr, sizeof(*attr));
-
-       if (rc != 0) {
-               CERROR("Failed to query IB device: %d\n", rc);
-               return rc;
-       }
-
+       hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
        if (hdev->ibh_mr_size == ~0ULL) {
                hdev->ibh_mr_shift = 64;
                return 0;
index 7b355319079c3f2b4ca41f944929411747b30b5c..8982f7d1b374823eb8dec18f98b0b1ef167e8f26 100644 (file)
@@ -1858,7 +1858,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
        int api32 = ll_need_32bit_api(sbi);
        loff_t ret = -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (origin) {
        case SEEK_SET:
                break;
@@ -1896,7 +1896,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
        goto out;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index c92d58b770ec2bbd3b56c0600a0c2a6aeb8b015e..39e2ffd5f97f1addb2d0b0067285935b30f07185 100644 (file)
@@ -2082,17 +2082,17 @@ putgl:
        /* update time if requested */
        rc = 0;
        if (llss->ia2.ia_valid != 0) {
-               mutex_lock(&llss->inode1->i_mutex);
+               inode_lock(llss->inode1);
                rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
-               mutex_unlock(&llss->inode1->i_mutex);
+               inode_unlock(llss->inode1);
        }
 
        if (llss->ia1.ia_valid != 0) {
                int rc1;
 
-               mutex_lock(&llss->inode2->i_mutex);
+               inode_lock(llss->inode2);
                rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
-               mutex_unlock(&llss->inode2->i_mutex);
+               inode_unlock(llss->inode2);
                if (rc == 0)
                        rc = rc1;
        }
@@ -2179,13 +2179,13 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
                         ATTR_MTIME | ATTR_MTIME_SET |
                         ATTR_ATIME | ATTR_ATIME_SET;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        rc = ll_setattr_raw(file->f_path.dentry, attr, true);
        if (rc == -ENODATA)
                rc = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        kfree(attr);
 free_hss:
@@ -2609,7 +2609,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
 
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* catch async errors that were recorded back when async writeback
         * failed for pages in this mapping. */
@@ -2641,7 +2641,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                        fd->fd_write_failed = false;
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
index ee8a1d67d19119ee034ed5ffa7d64829042aa20d..845e992ca5fcb9898abb4887f70c9165818d4079 100644 (file)
@@ -631,8 +631,6 @@ struct ll_file_data {
 
 struct lov_stripe_md;
 
-extern spinlock_t inode_lock;
-
 extern struct dentry *llite_root;
 extern struct kset *llite_kset;
 
index 1db93af62badd52ecf551ce6b5b3f2d050efd678..b2fc5b3786ee62890956c25f6972ee329489346f 100644 (file)
@@ -1277,7 +1277,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
                return -ENOMEM;
 
        if (!S_ISDIR(inode->i_mode))
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        memcpy(&op_data->op_attr, attr, sizeof(*attr));
 
@@ -1358,7 +1358,7 @@ out:
        ll_finish_md_op_data(op_data);
 
        if (!S_ISDIR(inode->i_mode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
                        inode_dio_wait(inode);
        }
index e578a1130ad1bfd007ef8c498b06fe4917d07efe..18aab25f9cd9440680c216fc14b340cfc00e5cec 100644 (file)
@@ -245,9 +245,9 @@ static int ll_get_name(struct dentry *dentry, char *name,
                goto out;
        }
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        rc = ll_dir_read(dir, &lgd.ctx);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        if (!rc && !lgd.lgd_found)
                rc = -ENOENT;
 out:
index 420d39123877b61c92c24d40dbe4ac37e5e0fc82..871924b3f2e72f44a8360cb1e6638d1c13b0eee3 100644 (file)
@@ -257,9 +257,9 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
         *    be asked to write less pages once, this purely depends on
         *    implementation. Anyway, we should be careful to avoid deadlocking.
         */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        cl_io_fini(env, io);
        return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
 }
index 95cdb0c58b04dffaa784cd04ef07f6f36fad004e..f355474967d62617319859e8df2830795a0a5458 100644 (file)
@@ -115,8 +115,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
                struct inode *inode = vmpage->mapping->host;
                loff_t pos;
 
-               if (mutex_trylock(&inode->i_mutex)) {
-                       mutex_unlock(&(inode)->i_mutex);
+               if (inode_trylock(inode)) {
+                       inode_unlock((inode));
 
                        /* this is too bad. Someone is trying to write the
                         * page w/o holding inode mutex. This means we can
index 39fa13b74cbdac3f82f128aee9837063d253183a..711fda93a58df7af9a6dbfe7e0403c0ef9d0700f 100644 (file)
@@ -403,7 +403,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
         * 1. Need inode mutex to operate transient pages.
         */
        if (iov_iter_rw(iter) == READ)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        LASSERT(obj->cob_transient_pages == 0);
        while (iov_iter_count(iter)) {
@@ -454,7 +454,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 out:
        LASSERT(obj->cob_transient_pages == 0);
        if (iov_iter_rw(iter) == READ)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        if (tot_bytes > 0) {
                if (iov_iter_rw(iter) == WRITE) {
index f68e972886caeb510b53f7704dc7ca85a8f1df90..0920ac6b3003afcf0a973a8a25f91c87d3900513 100644 (file)
@@ -439,7 +439,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
        struct inode    *inode = ccc_object_inode(io->ci_obj);
        int result = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (cl_io_is_trunc(io))
                result = vvp_io_setattr_trunc(env, ios, inode,
                                        io->u.ci_setattr.sa_attr.lvb_size);
@@ -459,7 +459,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
                 * because osc has already notified to destroy osc_extents. */
                vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 static void vvp_io_setattr_fini(const struct lu_env *env,
index 99c0d7aee921d884d7ebc81e2d2658b978c88930..a133475a7c74af7a93fd0ffe2fac4294db284f0c 100644 (file)
@@ -428,7 +428,7 @@ static void vvp_transient_page_verify(const struct cl_page *page)
 {
        struct inode *inode = ccc_object_inode(page->cp_obj);
 
-       LASSERT(!mutex_trylock(&inode->i_mutex));
+       LASSERT(!inode_trylock(inode));
 }
 
 static int vvp_transient_page_own(const struct lu_env *env,
@@ -480,9 +480,9 @@ static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
        struct inode    *inode = ccc_object_inode(slice->cpl_obj);
        int     locked;
 
-       locked = !mutex_trylock(&inode->i_mutex);
+       locked = !inode_trylock(inode);
        if (!locked)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        return locked ? -EBUSY : -ENODATA;
 }
 
@@ -502,7 +502,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
        struct ccc_object *clobj = cl2ccc(clp->cp_obj);
 
        vvp_page_fini_common(cp);
-       LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+       LASSERT(!inode_trylock(clobj->cob_inode));
        clobj->cob_transient_pages--;
 }
 
@@ -548,7 +548,7 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
        } else {
                struct ccc_object *clobj = cl2ccc(obj);
 
-               LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+               LASSERT(!inode_trylock(clobj->cob_inode));
                cl_page_slice_add(page, &cpg->cpg_cl, obj,
                                &vvp_transient_page_ops);
                clobj->cob_transient_pages++;
index 79ac19246548afb5a6e5574a97d09dd7cb8ca110..70b8f4fabfad31af474fae8b7943f7a1404cbb8f 100644 (file)
@@ -825,8 +825,7 @@ static void lcd_write_cmd_s(int cmd)
        lcd_send_serial(0x1F);  /* R/W=W, RS=0 */
        lcd_send_serial(cmd & 0x0F);
        lcd_send_serial((cmd >> 4) & 0x0F);
-       /* the shortest command takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest command takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -837,8 +836,7 @@ static void lcd_write_data_s(int data)
        lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
        lcd_send_serial(data & 0x0F);
        lcd_send_serial((data >> 4) & 0x0F);
-       /* the shortest data takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest data takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -848,20 +846,19 @@ static void lcd_write_cmd_p8(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, cmd);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_CLR;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(120, 500); /* the shortest command takes at least 120 us */
+       udelay(120);    /* the shortest command takes at least 120 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -871,20 +868,19 @@ static void lcd_write_data_p8(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_SET;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(45, 100);  /* the shortest data takes at least 45 us */
+       udelay(45);     /* the shortest data takes at least 45 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -894,7 +890,7 @@ static void lcd_write_cmd_tilcd(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the control port */
        w_ctr(pprt, cmd);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -904,7 +900,7 @@ static void lcd_write_data_tilcd(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -947,7 +943,7 @@ static void lcd_clear_fast_s(void)
                lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
                lcd_send_serial(' ' & 0x0F);
                lcd_send_serial((' ' >> 4) & 0x0F);
-               usleep_range(40, 100);  /* the shortest data takes at least 40 us */
+               udelay(40);     /* the shortest data takes at least 40 us */
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -971,7 +967,7 @@ static void lcd_clear_fast_p8(void)
                w_dtr(pprt, ' ');
 
                /* maintain the data during 20 us before the strobe */
-               usleep_range(20, 100);
+               udelay(20);
 
                bits.e = BIT_SET;
                bits.rs = BIT_SET;
@@ -979,13 +975,13 @@ static void lcd_clear_fast_p8(void)
                set_ctrl_bits();
 
                /* maintain the strobe during 40 us */
-               usleep_range(40, 100);
+               udelay(40);
 
                bits.e = BIT_CLR;
                set_ctrl_bits();
 
                /* the shortest data takes at least 45 us */
-               usleep_range(45, 100);
+               udelay(45);
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -1007,7 +1003,7 @@ static void lcd_clear_fast_tilcd(void)
        for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) {
                /* present the data to the data port */
                w_dtr(pprt, ' ');
-               usleep_range(60, 120);
+               udelay(60);
        }
 
        spin_unlock_irq(&pprt_lock);
index ba8765063174c4e05b517cdf45d33c6c5f57cf25..f1f3ecadf0fb0d0e1a0bb11d014d766e1187e55c 100644 (file)
@@ -22,12 +22,6 @@ menuconfig STAGING_RDMA
 # Please keep entries in alphabetic order
 if STAGING_RDMA
 
-source "drivers/staging/rdma/amso1100/Kconfig"
-
-source "drivers/staging/rdma/ehca/Kconfig"
-
 source "drivers/staging/rdma/hfi1/Kconfig"
 
-source "drivers/staging/rdma/ipath/Kconfig"
-
 endif
index 139d78ef2c24388b93ed6a008090190cff6de663..8c7fc1de48a7bf6f86f103ad120f9052ea399c58 100644 (file)
@@ -1,5 +1,2 @@
 # Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_AMSO1100)      += amso1100/
-obj-$(CONFIG_INFINIBAND_EHCA)  += ehca/
 obj-$(CONFIG_INFINIBAND_HFI1)  += hfi1/
-obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
diff --git a/drivers/staging/rdma/amso1100/Kbuild b/drivers/staging/rdma/amso1100/Kbuild
deleted file mode 100644 (file)
index 950dfab..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
-
-iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
-       c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/staging/rdma/amso1100/Kconfig b/drivers/staging/rdma/amso1100/Kconfig
deleted file mode 100644 (file)
index e6ce5f2..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_AMSO1100
-       tristate "Ammasso 1100 HCA support"
-       depends on PCI && INET
-       ---help---
-         This is a low-level driver for the Ammasso 1100 host
-         channel adapter (HCA).
-
-config INFINIBAND_AMSO1100_DEBUG
-       bool "Verbose debugging output"
-       depends on INFINIBAND_AMSO1100
-       default n
-       ---help---
-         This option causes the amso1100 driver to produce a bunch of
-         debug messages.  Select this if you are developing the driver
-         or trying to diagnose a problem.
diff --git a/drivers/staging/rdma/amso1100/TODO b/drivers/staging/rdma/amso1100/TODO
deleted file mode 100644 (file)
index 18b00a5..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-7/2015
-
-The amso1100 driver has been deprecated and moved to drivers/staging.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/amso1100/c2.c b/drivers/staging/rdma/amso1100/c2.c
deleted file mode 100644 (file)
index b46ebd1..0000000
+++ /dev/null
@@ -1,1240 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/prefetch.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_provider.h"
-
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
-    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-
-static int debug = -1;         /* defaults above */
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-static int c2_up(struct net_device *netdev);
-static int c2_down(struct net_device *netdev);
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-static void c2_tx_interrupt(struct net_device *netdev);
-static void c2_rx_interrupt(struct net_device *netdev);
-static irqreturn_t c2_interrupt(int irq, void *dev_id);
-static void c2_tx_timeout(struct net_device *netdev);
-static int c2_change_mtu(struct net_device *netdev, int new_mtu);
-static void c2_reset(struct c2_port *c2_port);
-
-static struct pci_device_id c2_pci_table[] = {
-       { PCI_DEVICE(0x18b8, 0xb001) },
-       { 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, c2_pci_table);
-
-static void c2_set_rxbufsize(struct c2_port *c2_port)
-{
-       struct net_device *netdev = c2_port->netdev;
-
-       if (netdev->mtu > RX_BUF_SIZE)
-               c2_port->rx_buf_size =
-                   netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
-                   NET_IP_ALIGN;
-       else
-               c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
-}
-
-/*
- * Allocate TX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
-                           dma_addr_t base, void __iomem * mmio_txp_ring)
-{
-       struct c2_tx_desc *tx_desc;
-       struct c2_txp_desc __iomem *txp_desc;
-       struct c2_element *elem;
-       int i;
-
-       tx_ring->start = kmalloc_array(tx_ring->count, sizeof(*elem),
-                                      GFP_KERNEL);
-       if (!tx_ring->start)
-               return -ENOMEM;
-
-       elem = tx_ring->start;
-       tx_desc = vaddr;
-       txp_desc = mmio_txp_ring;
-       for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
-               tx_desc->len = 0;
-               tx_desc->status = 0;
-
-               /* Set TXP_HTXD_UNINIT */
-               __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-                            (void __iomem *) txp_desc + C2_TXP_ADDR);
-               __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
-               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-                            (void __iomem *) txp_desc + C2_TXP_FLAGS);
-
-               elem->skb = NULL;
-               elem->ht_desc = tx_desc;
-               elem->hw_desc = txp_desc;
-
-               if (i == tx_ring->count - 1) {
-                       elem->next = tx_ring->start;
-                       tx_desc->next_offset = base;
-               } else {
-                       elem->next = elem + 1;
-                       tx_desc->next_offset =
-                           base + (i + 1) * sizeof(*tx_desc);
-               }
-       }
-
-       tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
-
-       return 0;
-}
-
-/*
- * Allocate RX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
-                           dma_addr_t base, void __iomem * mmio_rxp_ring)
-{
-       struct c2_rx_desc *rx_desc;
-       struct c2_rxp_desc __iomem *rxp_desc;
-       struct c2_element *elem;
-       int i;
-
-       rx_ring->start = kmalloc_array(rx_ring->count, sizeof(*elem),
-                                      GFP_KERNEL);
-       if (!rx_ring->start)
-               return -ENOMEM;
-
-       elem = rx_ring->start;
-       rx_desc = vaddr;
-       rxp_desc = mmio_rxp_ring;
-       for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
-               rx_desc->len = 0;
-               rx_desc->status = 0;
-
-               /* Set RXP_HRXD_UNINIT */
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
-                      (void __iomem *) rxp_desc + C2_RXP_STATUS);
-               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
-               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
-               __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-                            (void __iomem *) rxp_desc + C2_RXP_ADDR);
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-                            (void __iomem *) rxp_desc + C2_RXP_FLAGS);
-
-               elem->skb = NULL;
-               elem->ht_desc = rx_desc;
-               elem->hw_desc = rxp_desc;
-
-               if (i == rx_ring->count - 1) {
-                       elem->next = rx_ring->start;
-                       rx_desc->next_offset = base;
-               } else {
-                       elem->next = elem + 1;
-                       rx_desc->next_offset =
-                           base + (i + 1) * sizeof(*rx_desc);
-               }
-       }
-
-       rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
-
-       return 0;
-}
-
-/* Setup buffer for receiving */
-static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_rx_desc *rx_desc = elem->ht_desc;
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen;
-       struct c2_rxp_hdr *rxp_hdr;
-
-       skb = dev_alloc_skb(c2_port->rx_buf_size);
-       if (unlikely(!skb)) {
-               pr_debug("%s: out of memory for receive\n",
-                       c2_port->netdev->name);
-               return -ENOMEM;
-       }
-
-       /* Zero out the rxp hdr in the sk_buff */
-       memset(skb->data, 0, sizeof(*rxp_hdr));
-
-       skb->dev = c2_port->netdev;
-
-       maplen = c2_port->rx_buf_size;
-       mapaddr =
-           pci_map_single(c2dev->pcidev, skb->data, maplen,
-                          PCI_DMA_FROMDEVICE);
-
-       /* Set the sk_buff RXP_header to RXP_HRXD_READY */
-       rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-       rxp_hdr->flags = RXP_HRXD_READY;
-
-       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-       __raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
-                    elem->hw_desc + C2_RXP_LEN);
-       __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                    elem->hw_desc + C2_RXP_FLAGS);
-
-       elem->skb = skb;
-       elem->mapaddr = mapaddr;
-       elem->maplen = maplen;
-       rx_desc->len = maplen;
-
-       return 0;
-}
-
-/*
- * Allocate buffers for the Rx ring
- * For receive:  rx_ring.to_clean is next received frame
- */
-static int c2_rx_fill(struct c2_port *c2_port)
-{
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       int ret = 0;
-
-       elem = rx_ring->start;
-       do {
-               if (c2_rx_alloc(c2_port, elem)) {
-                       ret = 1;
-                       break;
-               }
-       } while ((elem = elem->next) != rx_ring->start);
-
-       rx_ring->to_clean = rx_ring->start;
-       return ret;
-}
-
-/* Free all buffers in RX ring, assumes receiver stopped */
-static void c2_rx_clean(struct c2_port *c2_port)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       struct c2_rx_desc *rx_desc;
-
-       elem = rx_ring->start;
-       do {
-               rx_desc = elem->ht_desc;
-               rx_desc->len = 0;
-
-               __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-               __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-               __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
-               __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-                            elem->hw_desc + C2_RXP_ADDR);
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-                            elem->hw_desc + C2_RXP_FLAGS);
-
-               if (elem->skb) {
-                       pci_unmap_single(c2dev->pcidev, elem->mapaddr,
-                                        elem->maplen, PCI_DMA_FROMDEVICE);
-                       dev_kfree_skb(elem->skb);
-                       elem->skb = NULL;
-               }
-       } while ((elem = elem->next) != rx_ring->start);
-}
-
-static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
-{
-       struct c2_tx_desc *tx_desc = elem->ht_desc;
-
-       tx_desc->len = 0;
-
-       pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
-                        PCI_DMA_TODEVICE);
-
-       if (elem->skb) {
-               dev_kfree_skb_any(elem->skb);
-               elem->skb = NULL;
-       }
-
-       return 0;
-}
-
-/* Free all buffers in TX ring, assumes transmitter stopped */
-static void c2_tx_clean(struct c2_port *c2_port)
-{
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       struct c2_txp_desc txp_htxd;
-       int retry;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-       elem = tx_ring->start;
-
-       do {
-               retry = 0;
-               do {
-                       txp_htxd.flags =
-                           readw(elem->hw_desc + C2_TXP_FLAGS);
-
-                       if (txp_htxd.flags == TXP_HTXD_READY) {
-                               retry = 1;
-                               __raw_writew(0,
-                                            elem->hw_desc + C2_TXP_LEN);
-                               __raw_writeq(0,
-                                            elem->hw_desc + C2_TXP_ADDR);
-                               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
-                                            elem->hw_desc + C2_TXP_FLAGS);
-                               c2_port->netdev->stats.tx_dropped++;
-                               break;
-                       } else {
-                               __raw_writew(0,
-                                            elem->hw_desc + C2_TXP_LEN);
-                               __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-                                            elem->hw_desc + C2_TXP_ADDR);
-                               __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-                                            elem->hw_desc + C2_TXP_FLAGS);
-                       }
-
-                       c2_tx_free(c2_port->c2dev, elem);
-
-               } while ((elem = elem->next) != tx_ring->start);
-       } while (retry);
-
-       c2_port->tx_avail = c2_port->tx_ring.count - 1;
-       c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
-
-       if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-               netif_wake_queue(c2_port->netdev);
-
-       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-}
-
-/*
- * Process transmit descriptors marked 'DONE' by the firmware,
- * freeing up their unneeded sk_buffs.
- */
-static void c2_tx_interrupt(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       struct c2_txp_desc txp_htxd;
-
-       spin_lock(&c2_port->tx_lock);
-
-       for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
-            elem = elem->next) {
-               txp_htxd.flags =
-                   be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
-
-               if (txp_htxd.flags != TXP_HTXD_DONE)
-                       break;
-
-               if (netif_msg_tx_done(c2_port)) {
-                       /* PCI reads are expensive in fast path */
-                       txp_htxd.len =
-                           be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
-                       pr_debug("%s: tx done slot %3Zu status 0x%x len "
-                               "%5u bytes\n",
-                               netdev->name, elem - tx_ring->start,
-                               txp_htxd.flags, txp_htxd.len);
-               }
-
-               c2_tx_free(c2dev, elem);
-               ++(c2_port->tx_avail);
-       }
-
-       tx_ring->to_clean = elem;
-
-       if (netif_queue_stopped(netdev)
-           && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-               netif_wake_queue(netdev);
-
-       spin_unlock(&c2_port->tx_lock);
-}
-
-static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
-{
-       struct c2_rx_desc *rx_desc = elem->ht_desc;
-       struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-
-       if (rxp_hdr->status != RXP_HRXD_OK ||
-           rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
-               pr_debug("BAD RXP_HRXD\n");
-               pr_debug("  rx_desc : %p\n", rx_desc);
-               pr_debug("    index : %Zu\n",
-                       elem - c2_port->rx_ring.start);
-               pr_debug("    len   : %u\n", rx_desc->len);
-               pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
-                       (void *) __pa((unsigned long) rxp_hdr));
-               pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
-               pr_debug("    status: 0x%x\n", rxp_hdr->status);
-               pr_debug("    len   : %u\n", rxp_hdr->len);
-               pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
-       }
-
-       /* Setup the skb for reuse since we're dropping this pkt */
-       elem->skb->data = elem->skb->head;
-       skb_reset_tail_pointer(elem->skb);
-
-       /* Zero out the rxp hdr in the sk_buff */
-       memset(elem->skb->data, 0, sizeof(*rxp_hdr));
-
-       /* Write the descriptor to the adapter's rx ring */
-       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-       __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-       __raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
-                    elem->hw_desc + C2_RXP_LEN);
-       __raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
-                    elem->hw_desc + C2_RXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                    elem->hw_desc + C2_RXP_FLAGS);
-
-       pr_debug("packet dropped\n");
-       c2_port->netdev->stats.rx_dropped++;
-}
-
-static void c2_rx_interrupt(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *rx_ring = &c2_port->rx_ring;
-       struct c2_element *elem;
-       struct c2_rx_desc *rx_desc;
-       struct c2_rxp_hdr *rxp_hdr;
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen, buflen;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2dev->lock, flags);
-
-       /* Begin where we left off */
-       rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
-
-       for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
-            elem = elem->next) {
-               rx_desc = elem->ht_desc;
-               mapaddr = elem->mapaddr;
-               maplen = elem->maplen;
-               skb = elem->skb;
-               rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-
-               if (rxp_hdr->flags != RXP_HRXD_DONE)
-                       break;
-               buflen = rxp_hdr->len;
-
-               /* Sanity check the RXP header */
-               if (rxp_hdr->status != RXP_HRXD_OK ||
-                   buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
-                       c2_rx_error(c2_port, elem);
-                       continue;
-               }
-
-               /*
-                * Allocate and map a new skb for replenishing the host
-                * RX desc
-                */
-               if (c2_rx_alloc(c2_port, elem)) {
-                       c2_rx_error(c2_port, elem);
-                       continue;
-               }
-
-               /* Unmap the old skb */
-               pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
-                                PCI_DMA_FROMDEVICE);
-
-               prefetch(skb->data);
-
-               /*
-                * Skip past the leading 8 bytes comprising of the
-                * "struct c2_rxp_hdr", prepended by the adapter
-                * to the usual Ethernet header ("struct ethhdr"),
-                * to the start of the raw Ethernet packet.
-                *
-                * Fix up the various fields in the sk_buff before
-                * passing it up to netif_rx(). The transfer size
-                * (in bytes) specified by the adapter len field of
-                * the "struct rxp_hdr_t" does NOT include the
-                * "sizeof(struct c2_rxp_hdr)".
-                */
-               skb->data += sizeof(*rxp_hdr);
-               skb_set_tail_pointer(skb, buflen);
-               skb->len = buflen;
-               skb->protocol = eth_type_trans(skb, netdev);
-
-               netif_rx(skb);
-
-               netdev->stats.rx_packets++;
-               netdev->stats.rx_bytes += buflen;
-       }
-
-       /* Save where we left off */
-       rx_ring->to_clean = elem;
-       c2dev->cur_rx = elem - rx_ring->start;
-       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-       spin_unlock_irqrestore(&c2dev->lock, flags);
-}
-
-/*
- * Handle netisr0 TX & RX interrupts.
- */
-static irqreturn_t c2_interrupt(int irq, void *dev_id)
-{
-       unsigned int netisr0, dmaisr;
-       int handled = 0;
-       struct c2_dev *c2dev = dev_id;
-
-       /* Process CCILNET interrupts */
-       netisr0 = readl(c2dev->regs + C2_NISR0);
-       if (netisr0) {
-
-               /*
-                * There is an issue with the firmware that always
-                * provides the status of RX for both TX & RX
-                * interrupts.  So process both queues here.
-                */
-               c2_rx_interrupt(c2dev->netdev);
-               c2_tx_interrupt(c2dev->netdev);
-
-               /* Clear the interrupt */
-               writel(netisr0, c2dev->regs + C2_NISR0);
-               handled++;
-       }
-
-       /* Process RNIC interrupts */
-       dmaisr = readl(c2dev->regs + C2_DISR);
-       if (dmaisr) {
-               writel(dmaisr, c2dev->regs + C2_DISR);
-               c2_rnic_interrupt(c2dev);
-               handled++;
-       }
-
-       if (handled) {
-               return IRQ_HANDLED;
-       } else {
-               return IRQ_NONE;
-       }
-}
-
-static int c2_up(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_element *elem;
-       struct c2_rxp_hdr *rxp_hdr;
-       struct in_device *in_dev;
-       size_t rx_size, tx_size;
-       int ret, i;
-       unsigned int netimr0;
-
-       if (netif_msg_ifup(c2_port))
-               pr_debug("%s: enabling interface\n", netdev->name);
-
-       /* Set the Rx buffer size based on MTU */
-       c2_set_rxbufsize(c2_port);
-
-       /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
-       rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
-       tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
-
-       c2_port->mem_size = tx_size + rx_size;
-       c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
-                                            &c2_port->dma);
-       if (c2_port->mem == NULL) {
-               pr_debug("Unable to allocate memory for "
-                       "host descriptor rings\n");
-               return -ENOMEM;
-       }
-
-       /* Create the Rx host descriptor ring */
-       if ((ret =
-            c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
-                             c2dev->mmio_rxp_ring))) {
-               pr_debug("Unable to create RX ring\n");
-               goto bail0;
-       }
-
-       /* Allocate Rx buffers for the host descriptor ring */
-       if (c2_rx_fill(c2_port)) {
-               pr_debug("Unable to fill RX ring\n");
-               goto bail1;
-       }
-
-       /* Create the Tx host descriptor ring */
-       if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
-                                   c2_port->dma + rx_size,
-                                   c2dev->mmio_txp_ring))) {
-               pr_debug("Unable to create TX ring\n");
-               goto bail1;
-       }
-
-       /* Set the TX pointer to where we left off */
-       c2_port->tx_avail = c2_port->tx_ring.count - 1;
-       c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
-           c2_port->tx_ring.start + c2dev->cur_tx;
-
-       /* missing: Initialize MAC */
-
-       BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
-
-       /* Reset the adapter, ensures the driver is in sync with the RXP */
-       c2_reset(c2_port);
-
-       /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
-       for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
-            i++, elem++) {
-               rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-               rxp_hdr->flags = 0;
-               __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-                            elem->hw_desc + C2_RXP_FLAGS);
-       }
-
-       /* Enable network packets */
-       netif_start_queue(netdev);
-
-       /* Enable IRQ */
-       writel(0, c2dev->regs + C2_IDIS);
-       netimr0 = readl(c2dev->regs + C2_NIMR0);
-       netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
-       writel(netimr0, c2dev->regs + C2_NIMR0);
-
-       /* Tell the stack to ignore arp requests for ipaddrs bound to
-        * other interfaces.  This is needed to prevent the host stack
-        * from responding to arp requests to the ipaddr bound on the
-        * rdma interface.
-        */
-       in_dev = in_dev_get(netdev);
-       IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
-       in_dev_put(in_dev);
-
-       return 0;
-
-bail1:
-       c2_rx_clean(c2_port);
-       kfree(c2_port->rx_ring.start);
-
-bail0:
-       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-                           c2_port->dma);
-
-       return ret;
-}
-
-static int c2_down(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-
-       if (netif_msg_ifdown(c2_port))
-               pr_debug("%s: disabling interface\n",
-                       netdev->name);
-
-       /* Wait for all the queued packets to get sent */
-       c2_tx_interrupt(netdev);
-
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-
-       /* Disable IRQs by clearing the interrupt mask */
-       writel(1, c2dev->regs + C2_IDIS);
-       writel(0, c2dev->regs + C2_NIMR0);
-
-       /* missing: Stop transmitter */
-
-       /* missing: Stop receiver */
-
-       /* Reset the adapter, ensures the driver is in sync with the RXP */
-       c2_reset(c2_port);
-
-       /* missing: Turn off LEDs here */
-
-       /* Free all buffers in the host descriptor rings */
-       c2_tx_clean(c2_port);
-       c2_rx_clean(c2_port);
-
-       /* Free the host descriptor rings */
-       kfree(c2_port->rx_ring.start);
-       kfree(c2_port->tx_ring.start);
-       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-                           c2_port->dma);
-
-       return 0;
-}
-
-static void c2_reset(struct c2_port *c2_port)
-{
-       struct c2_dev *c2dev = c2_port->c2dev;
-       unsigned int cur_rx = c2dev->cur_rx;
-
-       /* Tell the hardware to quiesce */
-       C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
-
-       /*
-        * The hardware will reset the C2_PCI_HRX_QUI bit once
-        * the RXP is quiesced.  Wait 2 seconds for this.
-        */
-       ssleep(2);
-
-       cur_rx = C2_GET_CUR_RX(c2dev);
-
-       if (cur_rx & C2_PCI_HRX_QUI)
-               pr_debug("c2_reset: failed to quiesce the hardware!\n");
-
-       cur_rx &= ~C2_PCI_HRX_QUI;
-
-       c2dev->cur_rx = cur_rx;
-
-       pr_debug("Current RX: %u\n", c2dev->cur_rx);
-}
-
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-       struct c2_dev *c2dev = c2_port->c2dev;
-       struct c2_ring *tx_ring = &c2_port->tx_ring;
-       struct c2_element *elem;
-       dma_addr_t mapaddr;
-       u32 maplen;
-       unsigned long flags;
-       unsigned int i;
-
-       spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-       if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
-               netif_stop_queue(netdev);
-               spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-               pr_debug("%s: Tx ring full when queue awake!\n",
-                       netdev->name);
-               return NETDEV_TX_BUSY;
-       }
-
-       maplen = skb_headlen(skb);
-       mapaddr =
-           pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
-
-       elem = tx_ring->to_use;
-       elem->skb = skb;
-       elem->mapaddr = mapaddr;
-       elem->maplen = maplen;
-
-       /* Tell HW to xmit */
-       __raw_writeq((__force u64) cpu_to_be64(mapaddr),
-                    elem->hw_desc + C2_TXP_ADDR);
-       __raw_writew((__force u16) cpu_to_be16(maplen),
-                    elem->hw_desc + C2_TXP_LEN);
-       __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-                    elem->hw_desc + C2_TXP_FLAGS);
-
-       netdev->stats.tx_packets++;
-       netdev->stats.tx_bytes += maplen;
-
-       /* Loop thru additional data fragments and queue them */
-       if (skb_shinfo(skb)->nr_frags) {
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                       const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-                       maplen = skb_frag_size(frag);
-                       mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag,
-                                                  0, maplen, DMA_TO_DEVICE);
-                       elem = elem->next;
-                       elem->skb = NULL;
-                       elem->mapaddr = mapaddr;
-                       elem->maplen = maplen;
-
-                       /* Tell HW to xmit */
-                       __raw_writeq((__force u64) cpu_to_be64(mapaddr),
-                                    elem->hw_desc + C2_TXP_ADDR);
-                       __raw_writew((__force u16) cpu_to_be16(maplen),
-                                    elem->hw_desc + C2_TXP_LEN);
-                       __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-                                    elem->hw_desc + C2_TXP_FLAGS);
-
-                       netdev->stats.tx_packets++;
-                       netdev->stats.tx_bytes += maplen;
-               }
-       }
-
-       tx_ring->to_use = elem->next;
-       c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
-
-       if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
-               netif_stop_queue(netdev);
-               if (netif_msg_tx_queued(c2_port))
-                       pr_debug("%s: transmit queue full\n",
-                               netdev->name);
-       }
-
-       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-       netdev->trans_start = jiffies;
-
-       return NETDEV_TX_OK;
-}
-
-static void c2_tx_timeout(struct net_device *netdev)
-{
-       struct c2_port *c2_port = netdev_priv(netdev);
-
-       if (netif_msg_timer(c2_port))
-               pr_debug("%s: tx timeout\n", netdev->name);
-
-       c2_tx_clean(c2_port);
-}
-
-static int c2_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       int ret = 0;
-
-       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-               return -EINVAL;
-
-       netdev->mtu = new_mtu;
-
-       if (netif_running(netdev)) {
-               c2_down(netdev);
-
-               c2_up(netdev);
-       }
-
-       return ret;
-}
-
-static const struct net_device_ops c2_netdev = {
-       .ndo_open               = c2_up,
-       .ndo_stop               = c2_down,
-       .ndo_start_xmit         = c2_xmit_frame,
-       .ndo_tx_timeout         = c2_tx_timeout,
-       .ndo_change_mtu         = c2_change_mtu,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-/* Initialize network device */
-static struct net_device *c2_devinit(struct c2_dev *c2dev,
-                                    void __iomem * mmio_addr)
-{
-       struct c2_port *c2_port = NULL;
-       struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
-
-       if (!netdev) {
-               pr_debug("c2_port etherdev alloc failed");
-               return NULL;
-       }
-
-       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-       netdev->netdev_ops = &c2_netdev;
-       netdev->watchdog_timeo = C2_TX_TIMEOUT;
-       netdev->irq = c2dev->pcidev->irq;
-
-       c2_port = netdev_priv(netdev);
-       c2_port->netdev = netdev;
-       c2_port->c2dev = c2dev;
-       c2_port->msg_enable = netif_msg_init(debug, default_msg);
-       c2_port->tx_ring.count = C2_NUM_TX_DESC;
-       c2_port->rx_ring.count = C2_NUM_RX_DESC;
-
-       spin_lock_init(&c2_port->tx_lock);
-
-       /* Copy our 48-bit ethernet hardware address */
-       memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
-
-       /* Validate the MAC address */
-       if (!is_valid_ether_addr(netdev->dev_addr)) {
-               pr_debug("Invalid MAC Address\n");
-               pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name,
-                        netdev->dev_addr, netdev->irq);
-               free_netdev(netdev);
-               return NULL;
-       }
-
-       c2dev->netdev = netdev;
-
-       return netdev;
-}
-
-static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
-       int ret = 0, i;
-       unsigned long reg0_start, reg0_flags, reg0_len;
-       unsigned long reg2_start, reg2_flags, reg2_len;
-       unsigned long reg4_start, reg4_flags, reg4_len;
-       unsigned kva_map_size;
-       struct net_device *netdev = NULL;
-       struct c2_dev *c2dev = NULL;
-       void __iomem *mmio_regs = NULL;
-
-       printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
-               DRV_VERSION);
-
-       /* Enable PCI device */
-       ret = pci_enable_device(pcidev);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
-                       pci_name(pcidev));
-               goto bail0;
-       }
-
-       reg0_start = pci_resource_start(pcidev, BAR_0);
-       reg0_len = pci_resource_len(pcidev, BAR_0);
-       reg0_flags = pci_resource_flags(pcidev, BAR_0);
-
-       reg2_start = pci_resource_start(pcidev, BAR_2);
-       reg2_len = pci_resource_len(pcidev, BAR_2);
-       reg2_flags = pci_resource_flags(pcidev, BAR_2);
-
-       reg4_start = pci_resource_start(pcidev, BAR_4);
-       reg4_len = pci_resource_len(pcidev, BAR_4);
-       reg4_flags = pci_resource_flags(pcidev, BAR_4);
-
-       pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
-       pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
-       pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
-
-       /* Make sure PCI base addr are MMIO */
-       if (!(reg0_flags & IORESOURCE_MEM) ||
-           !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
-               printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
-               ret = -ENODEV;
-               goto bail1;
-       }
-
-       /* Check for weird/broken PCI region reporting */
-       if ((reg0_len < C2_REG0_SIZE) ||
-           (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
-               printk(KERN_ERR PFX "Invalid PCI region sizes\n");
-               ret = -ENODEV;
-               goto bail1;
-       }
-
-       /* Reserve PCI I/O and memory resources */
-       ret = pci_request_regions(pcidev, DRV_NAME);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: Unable to request regions\n",
-                       pci_name(pcidev));
-               goto bail1;
-       }
-
-       if ((sizeof(dma_addr_t) > 4)) {
-               ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
-               if (ret < 0) {
-                       printk(KERN_ERR PFX "64b DMA configuration failed\n");
-                       goto bail2;
-               }
-       } else {
-               ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
-               if (ret < 0) {
-                       printk(KERN_ERR PFX "32b DMA configuration failed\n");
-                       goto bail2;
-               }
-       }
-
-       /* Enables bus-mastering on the device */
-       pci_set_master(pcidev);
-
-       /* Remap the adapter PCI registers in BAR4 */
-       mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-                                   sizeof(struct c2_adapter_pci_regs));
-       if (!mmio_regs) {
-               printk(KERN_ERR PFX
-                       "Unable to remap adapter PCI registers in BAR4\n");
-               ret = -EIO;
-               goto bail2;
-       }
-
-       /* Validate PCI regs magic */
-       for (i = 0; i < sizeof(c2_magic); i++) {
-               if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
-                       printk(KERN_ERR PFX "Downlevel Firmware boot loader "
-                               "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
-                              "utility to update your boot loader\n",
-                               i + 1, sizeof(c2_magic),
-                               readb(mmio_regs + C2_REGS_MAGIC + i),
-                               c2_magic[i]);
-                       printk(KERN_ERR PFX "Adapter not claimed\n");
-                       iounmap(mmio_regs);
-                       ret = -EIO;
-                       goto bail2;
-               }
-       }
-
-       /* Validate the adapter version */
-       if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
-               printk(KERN_ERR PFX "Version mismatch "
-                       "[fw=%u, c2=%u], Adapter not claimed\n",
-                       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
-                       C2_VERSION);
-               ret = -EINVAL;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       /* Validate the adapter IVN */
-       if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
-               printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
-                      "the OpenIB device support kit. "
-                      "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
-                      C2_IVN);
-               ret = -EINVAL;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       /* Allocate hardware structure */
-       c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
-       if (!c2dev) {
-               printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
-                       pci_name(pcidev));
-               ret = -ENOMEM;
-               iounmap(mmio_regs);
-               goto bail2;
-       }
-
-       memset(c2dev, 0, sizeof(*c2dev));
-       spin_lock_init(&c2dev->lock);
-       c2dev->pcidev = pcidev;
-       c2dev->cur_tx = 0;
-
-       /* Get the last RX index */
-       c2dev->cur_rx =
-           (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
-            0xffffc000) / sizeof(struct c2_rxp_desc);
-
-       /* Request an interrupt line for the driver */
-       ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
-       if (ret) {
-               printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
-                       pci_name(pcidev), pcidev->irq);
-               iounmap(mmio_regs);
-               goto bail3;
-       }
-
-       /* Set driver specific data */
-       pci_set_drvdata(pcidev, c2dev);
-
-       /* Initialize network device */
-       if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
-               ret = -ENOMEM;
-               iounmap(mmio_regs);
-               goto bail4;
-       }
-
-       /* Save off the actual size prior to unmapping mmio_regs */
-       kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
-
-       /* Unmap the adapter PCI registers in BAR4 */
-       iounmap(mmio_regs);
-
-       /* Register network device */
-       ret = register_netdev(netdev);
-       if (ret) {
-               printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
-                       ret);
-               goto bail5;
-       }
-
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-
-       /* Remap the adapter HRXDQ PA space to kernel VA space */
-       c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
-                                              C2_RXP_HRXDQ_SIZE);
-       if (!c2dev->mmio_rxp_ring) {
-               printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
-               ret = -EIO;
-               goto bail6;
-       }
-
-       /* Remap the adapter HTXDQ PA space to kernel VA space */
-       c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
-                                              C2_TXP_HTXDQ_SIZE);
-       if (!c2dev->mmio_txp_ring) {
-               printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
-               ret = -EIO;
-               goto bail7;
-       }
-
-       /* Save off the current RX index in the last 4 bytes of the TXP Ring */
-       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-       /* Remap the PCI registers in adapter BAR0 to kernel VA space */
-       c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
-       if (!c2dev->regs) {
-               printk(KERN_ERR PFX "Unable to remap BAR0\n");
-               ret = -EIO;
-               goto bail8;
-       }
-
-       /* Remap the PCI registers in adapter BAR4 to kernel VA space */
-       c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
-       c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-                                    kva_map_size);
-       if (!c2dev->kva) {
-               printk(KERN_ERR PFX "Unable to remap BAR4\n");
-               ret = -EIO;
-               goto bail9;
-       }
-
-       /* Print out the MAC address */
-       pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr,
-                netdev->irq);
-
-       ret = c2_rnic_init(c2dev);
-       if (ret) {
-               printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
-               goto bail10;
-       }
-
-       ret = c2_register_device(c2dev);
-       if (ret)
-               goto bail10;
-
-       return 0;
-
- bail10:
-       iounmap(c2dev->kva);
-
- bail9:
-       iounmap(c2dev->regs);
-
- bail8:
-       iounmap(c2dev->mmio_txp_ring);
-
- bail7:
-       iounmap(c2dev->mmio_rxp_ring);
-
- bail6:
-       unregister_netdev(netdev);
-
- bail5:
-       free_netdev(netdev);
-
- bail4:
-       free_irq(pcidev->irq, c2dev);
-
- bail3:
-       ib_dealloc_device(&c2dev->ibdev);
-
- bail2:
-       pci_release_regions(pcidev);
-
- bail1:
-       pci_disable_device(pcidev);
-
- bail0:
-       return ret;
-}
-
-static void c2_remove(struct pci_dev *pcidev)
-{
-       struct c2_dev *c2dev = pci_get_drvdata(pcidev);
-       struct net_device *netdev = c2dev->netdev;
-
-       /* Unregister with OpenIB */
-       c2_unregister_device(c2dev);
-
-       /* Clean up the RNIC resources */
-       c2_rnic_term(c2dev);
-
-       /* Remove network device from the kernel */
-       unregister_netdev(netdev);
-
-       /* Free network device */
-       free_netdev(netdev);
-
-       /* Free the interrupt line */
-       free_irq(pcidev->irq, c2dev);
-
-       /* missing: Turn LEDs off here */
-
-       /* Unmap adapter PA space */
-       iounmap(c2dev->kva);
-       iounmap(c2dev->regs);
-       iounmap(c2dev->mmio_txp_ring);
-       iounmap(c2dev->mmio_rxp_ring);
-
-       /* Free the hardware structure */
-       ib_dealloc_device(&c2dev->ibdev);
-
-       /* Release reserved PCI I/O and memory resources */
-       pci_release_regions(pcidev);
-
-       /* Disable PCI device */
-       pci_disable_device(pcidev);
-
-       /* Clear driver specific data */
-       pci_set_drvdata(pcidev, NULL);
-}
-
-static struct pci_driver c2_pci_driver = {
-       .name = DRV_NAME,
-       .id_table = c2_pci_table,
-       .probe = c2_probe,
-       .remove = c2_remove,
-};
-
-module_pci_driver(c2_pci_driver);
diff --git a/drivers/staging/rdma/amso1100/c2.h b/drivers/staging/rdma/amso1100/c2.h
deleted file mode 100644 (file)
index 21b565a..0000000
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __C2_H
-#define __C2_H
-
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/idr.h>
-
-#include "c2_provider.h"
-#include "c2_mq.h"
-#include "c2_status.h"
-
-#define DRV_NAME     "c2"
-#define DRV_VERSION  "1.1"
-#define PFX          DRV_NAME ": "
-
-#define BAR_0                0
-#define BAR_2                2
-#define BAR_4                4
-
-#define RX_BUF_SIZE         (1536 + 8)
-#define ETH_JUMBO_MTU        9000
-#define C2_MAGIC            "CEPHEUS"
-#define C2_VERSION           4
-#define C2_IVN              (18 & 0x7fffffff)
-
-#define C2_REG0_SIZE        (16 * 1024)
-#define C2_REG2_SIZE        (2 * 1024 * 1024)
-#define C2_REG4_SIZE        (256 * 1024 * 1024)
-#define C2_NUM_TX_DESC       341
-#define C2_NUM_RX_DESC       256
-#define C2_PCI_REGS_OFFSET  (0x10000)
-#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
-#define C2_RXP_HRXDQ_SIZE   (4096)
-#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
-#define C2_TXP_HTXDQ_SIZE   (4096)
-#define C2_TX_TIMEOUT      (6*HZ)
-
-/* CEPHEUS */
-static const u8 c2_magic[] = {
-       0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
-};
-
-enum adapter_pci_regs {
-       C2_REGS_MAGIC = 0x0000,
-       C2_REGS_VERS = 0x0008,
-       C2_REGS_IVN = 0x000C,
-       C2_REGS_PCI_WINSIZE = 0x0010,
-       C2_REGS_Q0_QSIZE = 0x0014,
-       C2_REGS_Q0_MSGSIZE = 0x0018,
-       C2_REGS_Q0_POOLSTART = 0x001C,
-       C2_REGS_Q0_SHARED = 0x0020,
-       C2_REGS_Q1_QSIZE = 0x0024,
-       C2_REGS_Q1_MSGSIZE = 0x0028,
-       C2_REGS_Q1_SHARED = 0x0030,
-       C2_REGS_Q2_QSIZE = 0x0034,
-       C2_REGS_Q2_MSGSIZE = 0x0038,
-       C2_REGS_Q2_SHARED = 0x0040,
-       C2_REGS_ENADDR = 0x004C,
-       C2_REGS_RDMA_ENADDR = 0x0054,
-       C2_REGS_HRX_CUR = 0x006C,
-};
-
-struct c2_adapter_pci_regs {
-       char reg_magic[8];
-       u32 version;
-       u32 ivn;
-       u32 pci_window_size;
-       u32 q0_q_size;
-       u32 q0_msg_size;
-       u32 q0_pool_start;
-       u32 q0_shared;
-       u32 q1_q_size;
-       u32 q1_msg_size;
-       u32 q1_pool_start;
-       u32 q1_shared;
-       u32 q2_q_size;
-       u32 q2_msg_size;
-       u32 q2_pool_start;
-       u32 q2_shared;
-       u32 log_start;
-       u32 log_size;
-       u8 host_enaddr[8];
-       u8 rdma_enaddr[8];
-       u32 crash_entry;
-       u32 crash_ready[2];
-       u32 fw_txd_cur;
-       u32 fw_hrxd_cur;
-       u32 fw_rxd_cur;
-};
-
-enum pci_regs {
-       C2_HISR = 0x0000,
-       C2_DISR = 0x0004,
-       C2_HIMR = 0x0008,
-       C2_DIMR = 0x000C,
-       C2_NISR0 = 0x0010,
-       C2_NISR1 = 0x0014,
-       C2_NIMR0 = 0x0018,
-       C2_NIMR1 = 0x001C,
-       C2_IDIS = 0x0020,
-};
-
-enum {
-       C2_PCI_HRX_INT = 1 << 8,
-       C2_PCI_HTX_INT = 1 << 17,
-       C2_PCI_HRX_QUI = 1 << 31,
-};
-
-/*
- * Cepheus registers in BAR0.
- */
-struct c2_pci_regs {
-       u32 hostisr;
-       u32 dmaisr;
-       u32 hostimr;
-       u32 dmaimr;
-       u32 netisr0;
-       u32 netisr1;
-       u32 netimr0;
-       u32 netimr1;
-       u32 int_disable;
-};
-
-/* TXP flags */
-enum c2_txp_flags {
-       TXP_HTXD_DONE = 0,
-       TXP_HTXD_READY = 1 << 0,
-       TXP_HTXD_UNINIT = 1 << 1,
-};
-
-/* RXP flags */
-enum c2_rxp_flags {
-       RXP_HRXD_UNINIT = 0,
-       RXP_HRXD_READY = 1 << 0,
-       RXP_HRXD_DONE = 1 << 1,
-};
-
-/* RXP status */
-enum c2_rxp_status {
-       RXP_HRXD_ZERO = 0,
-       RXP_HRXD_OK = 1 << 0,
-       RXP_HRXD_BUF_OV = 1 << 1,
-};
-
-/* TXP descriptor fields */
-enum txp_desc {
-       C2_TXP_FLAGS = 0x0000,
-       C2_TXP_LEN = 0x0002,
-       C2_TXP_ADDR = 0x0004,
-};
-
-/* RXP descriptor fields */
-enum rxp_desc {
-       C2_RXP_FLAGS = 0x0000,
-       C2_RXP_STATUS = 0x0002,
-       C2_RXP_COUNT = 0x0004,
-       C2_RXP_LEN = 0x0006,
-       C2_RXP_ADDR = 0x0008,
-};
-
-struct c2_txp_desc {
-       u16 flags;
-       u16 len;
-       u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_desc {
-       u16 flags;
-       u16 status;
-       u16 count;
-       u16 len;
-       u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_hdr {
-       u16 flags;
-       u16 status;
-       u16 len;
-       u16 rsvd;
-} __attribute__ ((packed));
-
-struct c2_tx_desc {
-       u32 len;
-       u32 status;
-       dma_addr_t next_offset;
-};
-
-struct c2_rx_desc {
-       u32 len;
-       u32 status;
-       dma_addr_t next_offset;
-};
-
-struct c2_alloc {
-       u32 last;
-       u32 max;
-       spinlock_t lock;
-       unsigned long *table;
-};
-
-struct c2_array {
-       struct {
-               void **page;
-               int used;
-       } *page_list;
-};
-
-/*
- * The MQ shared pointer pool is organized as a linked list of
- * chunks. Each chunk contains a linked list of free shared pointers
- * that can be allocated to a given user mode client.
- *
- */
-struct sp_chunk {
-       struct sp_chunk *next;
-       dma_addr_t dma_addr;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       u16 head;
-       u16 shared_ptr[0];
-};
-
-struct c2_pd_table {
-       u32 last;
-       u32 max;
-       spinlock_t lock;
-       unsigned long *table;
-};
-
-struct c2_qp_table {
-       struct idr idr;
-       spinlock_t lock;
-};
-
-struct c2_element {
-       struct c2_element *next;
-       void *ht_desc;          /* host     descriptor */
-       void __iomem *hw_desc;  /* hardware descriptor */
-       struct sk_buff *skb;
-       dma_addr_t mapaddr;
-       u32 maplen;
-};
-
-struct c2_ring {
-       struct c2_element *to_clean;
-       struct c2_element *to_use;
-       struct c2_element *start;
-       unsigned long count;
-};
-
-struct c2_dev {
-       struct ib_device ibdev;
-       void __iomem *regs;
-       void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
-       void __iomem *mmio_rxp_ring;
-       spinlock_t lock;
-       struct pci_dev *pcidev;
-       struct net_device *netdev;
-       struct net_device *pseudo_netdev;
-       unsigned int cur_tx;
-       unsigned int cur_rx;
-       u32 adapter_handle;
-       int device_cap_flags;
-       void __iomem *kva;      /* KVA device memory */
-       unsigned long pa;       /* PA device memory */
-       void **qptr_array;
-
-       struct kmem_cache *host_msg_cache;
-
-       struct list_head cca_link;              /* adapter list */
-       struct list_head eh_wakeup_list;        /* event wakeup list */
-       wait_queue_head_t req_vq_wo;
-
-       /* Cached RNIC properties */
-       struct ib_device_attr props;
-
-       struct c2_pd_table pd_table;
-       struct c2_qp_table qp_table;
-       int ports;              /* num of GigE ports */
-       int devnum;
-       spinlock_t vqlock;      /* sync vbs req MQ */
-
-       /* Verbs Queues */
-       struct c2_mq req_vq;    /* Verbs Request MQ */
-       struct c2_mq rep_vq;    /* Verbs Reply MQ */
-       struct c2_mq aeq;       /* Async Events MQ */
-
-       /* Kernel client MQs */
-       struct sp_chunk *kern_mqsp_pool;
-
-       /* Device updates these values when posting messages to a host
-        * target queue */
-       u16 req_vq_shared;
-       u16 rep_vq_shared;
-       u16 aeq_shared;
-       u16 irq_claimed;
-
-       /*
-        * Shared host target pages for user-accessible MQs.
-        */
-       int hthead;             /* index of first free entry */
-       void *htpages;          /* kernel vaddr */
-       int htlen;              /* length of htpages memory */
-       void *htuva;            /* user mapped vaddr */
-       spinlock_t htlock;      /* serialize allocation */
-
-       u64 adapter_hint_uva;   /* access to the activity FIFO */
-
-       //      spinlock_t aeq_lock;
-       //      spinlock_t rnic_lock;
-
-       __be16 *hint_count;
-       dma_addr_t hint_count_dma;
-       u16 hints_read;
-
-       int init;               /* TRUE if it's ready */
-       char ae_cache_name[16];
-       char vq_cache_name[16];
-};
-
-struct c2_port {
-       u32 msg_enable;
-       struct c2_dev *c2dev;
-       struct net_device *netdev;
-
-       spinlock_t tx_lock;
-       u32 tx_avail;
-       struct c2_ring tx_ring;
-       struct c2_ring rx_ring;
-
-       void *mem;              /* PCI memory for host rings */
-       dma_addr_t dma;
-       unsigned long mem_size;
-
-       u32 rx_buf_size;
-};
-
-/*
- * Activity FIFO registers in BAR0.
- */
-#define PCI_BAR0_HOST_HINT     0x100
-#define PCI_BAR0_ADAPTER_HINT  0x2000
-
-/*
- * Ammasso PCI vendor id and Cepheus PCI device id.
- */
-#define CQ_ARMED       0x01
-#define CQ_WAIT_FOR_DMA        0x80
-
-/*
- * The format of a hint is as follows:
- * Lower 16 bits are the count of hints for the queue.
- * Next 15 bits are the qp_index
- * Upper most bit depends on who reads it:
- *    If read by producer, then it means Full (1) or Not-Full (0)
- *    If read by consumer, then it means Empty (1) or Not-Empty (0)
- */
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-
-/*
- * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
- * struct.
- */
-#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
-
-#ifndef readq
-static inline u64 readq(const void __iomem * addr)
-{
-       u64 ret = readl(addr + 4);
-       ret <<= 32;
-       ret |= readl(addr);
-
-       return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void __raw_writeq(u64 val, void __iomem * addr)
-{
-       __raw_writel((u32) (val), addr);
-       __raw_writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
-#define C2_SET_CUR_RX(c2dev, cur_rx) \
-       __raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
-
-#define C2_GET_CUR_RX(c2dev) \
-       be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
-
-static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
-{
-       return container_of(ibdev, struct c2_dev, ibdev);
-}
-
-static inline int c2_errno(void *reply)
-{
-       switch (c2_wr_get_result(reply)) {
-       case C2_OK:
-               return 0;
-       case CCERR_NO_BUFS:
-       case CCERR_INSUFFICIENT_RESOURCES:
-       case CCERR_ZERO_RDMA_READ_RESOURCES:
-               return -ENOMEM;
-       case CCERR_MR_IN_USE:
-       case CCERR_QP_IN_USE:
-               return -EBUSY;
-       case CCERR_ADDR_IN_USE:
-               return -EADDRINUSE;
-       case CCERR_ADDR_NOT_AVAIL:
-               return -EADDRNOTAVAIL;
-       case CCERR_CONN_RESET:
-               return -ECONNRESET;
-       case CCERR_NOT_IMPLEMENTED:
-       case CCERR_INVALID_WQE:
-               return -ENOSYS;
-       case CCERR_QP_NOT_PRIVILEGED:
-               return -EPERM;
-       case CCERR_STACK_ERROR:
-               return -EPROTO;
-       case CCERR_ACCESS_VIOLATION:
-       case CCERR_BASE_AND_BOUNDS_VIOLATION:
-               return -EFAULT;
-       case CCERR_STAG_STATE_NOT_INVALID:
-       case CCERR_INVALID_ADDRESS:
-       case CCERR_INVALID_CQ:
-       case CCERR_INVALID_EP:
-       case CCERR_INVALID_MODIFIER:
-       case CCERR_INVALID_MTU:
-       case CCERR_INVALID_PD_ID:
-       case CCERR_INVALID_QP:
-       case CCERR_INVALID_RNIC:
-       case CCERR_INVALID_STAG:
-               return -EINVAL;
-       default:
-               return -EAGAIN;
-       }
-}
-
-/* Device */
-int c2_register_device(struct c2_dev *c2dev);
-void c2_unregister_device(struct c2_dev *c2dev);
-int c2_rnic_init(struct c2_dev *c2dev);
-void c2_rnic_term(struct c2_dev *c2dev);
-void c2_rnic_interrupt(struct c2_dev *c2dev);
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-
-/* QPs */
-int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
-                      struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-                       struct ib_qp_attr *attr, int attr_mask);
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-                                int ord, int ird);
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-                       struct ib_send_wr **bad_wr);
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-                          struct ib_recv_wr **bad_wr);
-void c2_init_qp_table(struct c2_dev *c2dev);
-void c2_cleanup_qp_table(struct c2_dev *c2dev);
-void c2_set_qp_state(struct c2_qp *, int);
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
-
-/* PDs */
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
-int c2_init_pd_table(struct c2_dev *c2dev);
-void c2_cleanup_pd_table(struct c2_dev *c2dev);
-
-/* CQs */
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-                     struct c2_ucontext *ctx, struct c2_cq *cq);
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-
-/* CM */
-int c2_llp_connect(struct iw_cm_id *cm_id,
-                         struct iw_cm_conn_param *iw_param);
-int c2_llp_accept(struct iw_cm_id *cm_id,
-                        struct iw_cm_conn_param *iw_param);
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
-                        u8 pdata_len);
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
-int c2_llp_service_destroy(struct iw_cm_id *cm_id);
-
-/* MM */
-int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
-                                     int page_size, int pbl_depth, u32 length,
-                                     u32 off, u64 *va, enum c2_acf acf,
-                                     struct c2_mr *mr);
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
-
-/* AE */
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
-
-/* MQSP Allocator */
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-                            struct sp_chunk **root);
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-                            dma_addr_t *dma_addr, gfp_t gfp_mask);
-void c2_free_mqsp(__be16* mqsp);
-#endif
diff --git a/drivers/staging/rdma/amso1100/c2_ae.c b/drivers/staging/rdma/amso1100/c2_ae.c
deleted file mode 100644 (file)
index eb7a92b..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_status.h"
-#include "c2_ae.h"
-
-static int c2_convert_cm_status(u32 c2_status)
-{
-       switch (c2_status) {
-       case C2_CONN_STATUS_SUCCESS:
-               return 0;
-       case C2_CONN_STATUS_REJECTED:
-               return -ENETRESET;
-       case C2_CONN_STATUS_REFUSED:
-               return -ECONNREFUSED;
-       case C2_CONN_STATUS_TIMEDOUT:
-               return -ETIMEDOUT;
-       case C2_CONN_STATUS_NETUNREACH:
-               return -ENETUNREACH;
-       case C2_CONN_STATUS_HOSTUNREACH:
-               return -EHOSTUNREACH;
-       case C2_CONN_STATUS_INVALID_RNIC:
-               return -EINVAL;
-       case C2_CONN_STATUS_INVALID_QP:
-               return -EINVAL;
-       case C2_CONN_STATUS_INVALID_QP_STATE:
-               return -EINVAL;
-       case C2_CONN_STATUS_ADDR_NOT_AVAIL:
-               return -EADDRNOTAVAIL;
-       default:
-               printk(KERN_ERR PFX
-                      "%s - Unable to convert CM status: %d\n",
-                      __func__, c2_status);
-               return -EIO;
-       }
-}
-
-static const char* to_event_str(int event)
-{
-       static const char* event_str[] = {
-               "CCAE_REMOTE_SHUTDOWN",
-               "CCAE_ACTIVE_CONNECT_RESULTS",
-               "CCAE_CONNECTION_REQUEST",
-               "CCAE_LLP_CLOSE_COMPLETE",
-               "CCAE_TERMINATE_MESSAGE_RECEIVED",
-               "CCAE_LLP_CONNECTION_RESET",
-               "CCAE_LLP_CONNECTION_LOST",
-               "CCAE_LLP_SEGMENT_SIZE_INVALID",
-               "CCAE_LLP_INVALID_CRC",
-               "CCAE_LLP_BAD_FPDU",
-               "CCAE_INVALID_DDP_VERSION",
-               "CCAE_INVALID_RDMA_VERSION",
-               "CCAE_UNEXPECTED_OPCODE",
-               "CCAE_INVALID_DDP_QUEUE_NUMBER",
-               "CCAE_RDMA_READ_NOT_ENABLED",
-               "CCAE_RDMA_WRITE_NOT_ENABLED",
-               "CCAE_RDMA_READ_TOO_SMALL",
-               "CCAE_NO_L_BIT",
-               "CCAE_TAGGED_INVALID_STAG",
-               "CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
-               "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
-               "CCAE_TAGGED_INVALID_PD",
-               "CCAE_WRAP_ERROR",
-               "CCAE_BAD_CLOSE",
-               "CCAE_BAD_LLP_CLOSE",
-               "CCAE_INVALID_MSN_RANGE",
-               "CCAE_INVALID_MSN_GAP",
-               "CCAE_IRRQ_OVERFLOW",
-               "CCAE_IRRQ_MSN_GAP",
-               "CCAE_IRRQ_MSN_RANGE",
-               "CCAE_IRRQ_INVALID_STAG",
-               "CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
-               "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
-               "CCAE_IRRQ_INVALID_PD",
-               "CCAE_IRRQ_WRAP_ERROR",
-               "CCAE_CQ_SQ_COMPLETION_OVERFLOW",
-               "CCAE_CQ_RQ_COMPLETION_ERROR",
-               "CCAE_QP_SRQ_WQE_ERROR",
-               "CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
-               "CCAE_CQ_OVERFLOW",
-               "CCAE_CQ_OPERATION_ERROR",
-               "CCAE_SRQ_LIMIT_REACHED",
-               "CCAE_QP_RQ_LIMIT_REACHED",
-               "CCAE_SRQ_CATASTROPHIC_ERROR",
-               "CCAE_RNIC_CATASTROPHIC_ERROR"
-       };
-
-       if (event < CCAE_REMOTE_SHUTDOWN ||
-           event > CCAE_RNIC_CATASTROPHIC_ERROR)
-               return "<invalid event>";
-
-       event -= CCAE_REMOTE_SHUTDOWN;
-       return event_str[event];
-}
-
-static const char *to_qp_state_str(int state)
-{
-       switch (state) {
-       case C2_QP_STATE_IDLE:
-               return "C2_QP_STATE_IDLE";
-       case C2_QP_STATE_CONNECTING:
-               return "C2_QP_STATE_CONNECTING";
-       case C2_QP_STATE_RTS:
-               return "C2_QP_STATE_RTS";
-       case C2_QP_STATE_CLOSING:
-               return "C2_QP_STATE_CLOSING";
-       case C2_QP_STATE_TERMINATE:
-               return "C2_QP_STATE_TERMINATE";
-       case C2_QP_STATE_ERROR:
-               return "C2_QP_STATE_ERROR";
-       default:
-               return "<invalid QP state>";
-       }
-}
-
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
-{
-       struct c2_mq *mq = c2dev->qptr_array[mq_index];
-       union c2wr *wr;
-       void *resource_user_context;
-       struct iw_cm_event cm_event;
-       struct ib_event ib_event;
-       enum c2_resource_indicator resource_indicator;
-       enum c2_event_id event_id;
-       unsigned long flags;
-       int status;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
-
-       /*
-        * retrieve the message
-        */
-       wr = c2_mq_consume(mq);
-       if (!wr)
-               return;
-
-       memset(&ib_event, 0, sizeof(ib_event));
-       memset(&cm_event, 0, sizeof(cm_event));
-
-       event_id = c2_wr_get_id(wr);
-       resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
-       resource_user_context =
-           (void *) (unsigned long) wr->ae.ae_generic.user_context;
-
-       status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
-
-       pr_debug("event received c2_dev=%p, event_id=%d, "
-               "resource_indicator=%d, user_context=%p, status = %d\n",
-               c2dev, event_id, resource_indicator, resource_user_context,
-               status);
-
-       switch (resource_indicator) {
-       case C2_RES_IND_QP:{
-
-               struct c2_qp *qp = resource_user_context;
-               struct iw_cm_id *cm_id = qp->cm_id;
-               struct c2wr_ae_active_connect_results *res;
-
-               if (!cm_id) {
-                       pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
-                               qp);
-                       goto ignore_it;
-               }
-               pr_debug("%s: event = %s, user_context=%llx, "
-                       "resource_type=%x, "
-                       "resource=%x, qp_state=%s\n",
-                       __func__,
-                       to_event_str(event_id),
-                       (unsigned long long) wr->ae.ae_generic.user_context,
-                       be32_to_cpu(wr->ae.ae_generic.resource_type),
-                       be32_to_cpu(wr->ae.ae_generic.resource),
-                       to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
-
-               c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
-
-               switch (event_id) {
-               case CCAE_ACTIVE_CONNECT_RESULTS:
-                       res = &wr->ae.ae_active_connect_results;
-                       cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-                       laddr->sin_addr.s_addr = res->laddr;
-                       raddr->sin_addr.s_addr = res->raddr;
-                       laddr->sin_port = res->lport;
-                       raddr->sin_port = res->rport;
-                       if (status == 0) {
-                               cm_event.private_data_len =
-                                       be32_to_cpu(res->private_data_length);
-                               cm_event.private_data = res->private_data;
-                       } else {
-                               spin_lock_irqsave(&qp->lock, flags);
-                               if (qp->cm_id) {
-                                       qp->cm_id->rem_ref(qp->cm_id);
-                                       qp->cm_id = NULL;
-                               }
-                               spin_unlock_irqrestore(&qp->lock, flags);
-                               cm_event.private_data_len = 0;
-                               cm_event.private_data = NULL;
-                       }
-                       if (cm_id->event_handler)
-                               cm_id->event_handler(cm_id, &cm_event);
-                       break;
-               case CCAE_TERMINATE_MESSAGE_RECEIVED:
-               case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
-                       ib_event.device = &c2dev->ibdev;
-                       ib_event.element.qp = &qp->ibqp;
-                       ib_event.event = IB_EVENT_QP_REQ_ERR;
-
-                       if (qp->ibqp.event_handler)
-                               qp->ibqp.event_handler(&ib_event,
-                                                      qp->ibqp.
-                                                      qp_context);
-                       break;
-               case CCAE_BAD_CLOSE:
-               case CCAE_LLP_CLOSE_COMPLETE:
-               case CCAE_LLP_CONNECTION_RESET:
-               case CCAE_LLP_CONNECTION_LOST:
-                       BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
-
-                       spin_lock_irqsave(&qp->lock, flags);
-                       if (qp->cm_id) {
-                               qp->cm_id->rem_ref(qp->cm_id);
-                               qp->cm_id = NULL;
-                       }
-                       spin_unlock_irqrestore(&qp->lock, flags);
-                       cm_event.event = IW_CM_EVENT_CLOSE;
-                       cm_event.status = 0;
-                       if (cm_id->event_handler)
-                               cm_id->event_handler(cm_id, &cm_event);
-                       break;
-               default:
-                       BUG_ON(1);
-                       pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
-                               "CM_ID=%p\n",
-                               __func__, __LINE__,
-                               event_id, qp, cm_id);
-                       break;
-               }
-               break;
-       }
-
-       case C2_RES_IND_EP:{
-
-               struct c2wr_ae_connection_request *req =
-                       &wr->ae.ae_connection_request;
-               struct iw_cm_id *cm_id =
-                       resource_user_context;
-
-               pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
-               if (event_id != CCAE_CONNECTION_REQUEST) {
-                       pr_debug("%s: Invalid event_id: %d\n",
-                               __func__, event_id);
-                       break;
-               }
-               cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
-               cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
-               laddr->sin_addr.s_addr = req->laddr;
-               raddr->sin_addr.s_addr = req->raddr;
-               laddr->sin_port = req->lport;
-               raddr->sin_port = req->rport;
-               cm_event.private_data_len =
-                       be32_to_cpu(req->private_data_length);
-               cm_event.private_data = req->private_data;
-               /*
-                * Until ird/ord negotiation via MPAv2 support is added, send
-                * max supported values
-                */
-               cm_event.ird = cm_event.ord = 128;
-
-               if (cm_id->event_handler)
-                       cm_id->event_handler(cm_id, &cm_event);
-               break;
-       }
-
-       case C2_RES_IND_CQ:{
-               struct c2_cq *cq =
-                   resource_user_context;
-
-               pr_debug("IB_EVENT_CQ_ERR\n");
-               ib_event.device = &c2dev->ibdev;
-               ib_event.element.cq = &cq->ibcq;
-               ib_event.event = IB_EVENT_CQ_ERR;
-
-               if (cq->ibcq.event_handler)
-                       cq->ibcq.event_handler(&ib_event,
-                                              cq->ibcq.cq_context);
-               break;
-       }
-
-       default:
-               printk("Bad resource indicator = %d\n",
-                      resource_indicator);
-               break;
-       }
-
- ignore_it:
-       c2_mq_free(mq);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_ae.h b/drivers/staging/rdma/amso1100/c2_ae.h
deleted file mode 100644 (file)
index 3a065c3..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_AE_H_
-#define _C2_AE_H_
-
-/*
- * WARNING: If you change this file, also bump C2_IVN_BASE
- * in common/include/clustercore/c2_ivn.h.
- */
-
-/*
- * Asynchronous Event Identifiers
- *
- * These start at 0x80 only so it's obvious from inspection that
- * they are not work-request statuses.  This isn't critical.
- *
- * NOTE: these event id's must fit in eight bits.
- */
-enum c2_event_id {
-       CCAE_REMOTE_SHUTDOWN = 0x80,
-       CCAE_ACTIVE_CONNECT_RESULTS,
-       CCAE_CONNECTION_REQUEST,
-       CCAE_LLP_CLOSE_COMPLETE,
-       CCAE_TERMINATE_MESSAGE_RECEIVED,
-       CCAE_LLP_CONNECTION_RESET,
-       CCAE_LLP_CONNECTION_LOST,
-       CCAE_LLP_SEGMENT_SIZE_INVALID,
-       CCAE_LLP_INVALID_CRC,
-       CCAE_LLP_BAD_FPDU,
-       CCAE_INVALID_DDP_VERSION,
-       CCAE_INVALID_RDMA_VERSION,
-       CCAE_UNEXPECTED_OPCODE,
-       CCAE_INVALID_DDP_QUEUE_NUMBER,
-       CCAE_RDMA_READ_NOT_ENABLED,
-       CCAE_RDMA_WRITE_NOT_ENABLED,
-       CCAE_RDMA_READ_TOO_SMALL,
-       CCAE_NO_L_BIT,
-       CCAE_TAGGED_INVALID_STAG,
-       CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
-       CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
-       CCAE_TAGGED_INVALID_PD,
-       CCAE_WRAP_ERROR,
-       CCAE_BAD_CLOSE,
-       CCAE_BAD_LLP_CLOSE,
-       CCAE_INVALID_MSN_RANGE,
-       CCAE_INVALID_MSN_GAP,
-       CCAE_IRRQ_OVERFLOW,
-       CCAE_IRRQ_MSN_GAP,
-       CCAE_IRRQ_MSN_RANGE,
-       CCAE_IRRQ_INVALID_STAG,
-       CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
-       CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
-       CCAE_IRRQ_INVALID_PD,
-       CCAE_IRRQ_WRAP_ERROR,
-       CCAE_CQ_SQ_COMPLETION_OVERFLOW,
-       CCAE_CQ_RQ_COMPLETION_ERROR,
-       CCAE_QP_SRQ_WQE_ERROR,
-       CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
-       CCAE_CQ_OVERFLOW,
-       CCAE_CQ_OPERATION_ERROR,
-       CCAE_SRQ_LIMIT_REACHED,
-       CCAE_QP_RQ_LIMIT_REACHED,
-       CCAE_SRQ_CATASTROPHIC_ERROR,
-       CCAE_RNIC_CATASTROPHIC_ERROR
-/* WARNING If you add more id's, make sure their values fit in eight bits. */
-};
-
-/*
- * Resource Indicators and Identifiers
- */
-enum c2_resource_indicator {
-       C2_RES_IND_QP = 1,
-       C2_RES_IND_EP,
-       C2_RES_IND_CQ,
-       C2_RES_IND_SRQ,
-};
-
-#endif /* _C2_AE_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_alloc.c b/drivers/staging/rdma/amso1100/c2_alloc.c
deleted file mode 100644 (file)
index 039872d..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/bitmap.h>
-
-#include "c2.h"
-
-static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
-                              struct sp_chunk **head)
-{
-       int i;
-       struct sp_chunk *new_head;
-       dma_addr_t dma_addr;
-
-       new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
-                                     &dma_addr, gfp_mask);
-       if (new_head == NULL)
-               return -ENOMEM;
-
-       new_head->dma_addr = dma_addr;
-       dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
-
-       new_head->next = NULL;
-       new_head->head = 0;
-
-       /* build list where each index is the next free slot */
-       for (i = 0;
-            i < (PAGE_SIZE - sizeof(struct sp_chunk) -
-                 sizeof(u16)) / sizeof(u16) - 1;
-            i++) {
-               new_head->shared_ptr[i] = i + 1;
-       }
-       /* terminate list */
-       new_head->shared_ptr[i] = 0xFFFF;
-
-       *head = new_head;
-       return 0;
-}
-
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-                     struct sp_chunk **root)
-{
-       return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
-}
-
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
-{
-       struct sp_chunk *next;
-
-       while (root) {
-               next = root->next;
-               dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
-                                 dma_unmap_addr(root, mapping));
-               root = next;
-       }
-}
-
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-                     dma_addr_t *dma_addr, gfp_t gfp_mask)
-{
-       u16 mqsp;
-
-       while (head) {
-               mqsp = head->head;
-               if (mqsp != 0xFFFF) {
-                       head->head = head->shared_ptr[mqsp];
-                       break;
-               } else if (head->next == NULL) {
-                       if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
-                           0) {
-                               head = head->next;
-                               mqsp = head->head;
-                               head->head = head->shared_ptr[mqsp];
-                               break;
-                       } else
-                               return NULL;
-               } else
-                       head = head->next;
-       }
-       if (head) {
-               *dma_addr = head->dma_addr +
-                           ((unsigned long) &(head->shared_ptr[mqsp]) -
-                            (unsigned long) head);
-               pr_debug("%s addr %p dma_addr %llx\n", __func__,
-                        &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
-               return (__force __be16 *) &(head->shared_ptr[mqsp]);
-       }
-       return NULL;
-}
-
-void c2_free_mqsp(__be16 *mqsp)
-{
-       struct sp_chunk *head;
-       u16 idx;
-
-       /* The chunk containing this ptr begins at the page boundary */
-       head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
-
-       /* Link head to new mqsp */
-       *mqsp = (__force __be16) head->head;
-
-       /* Compute the shared_ptr index */
-       idx = (offset_in_page(mqsp)) >> 1;
-       idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
-
-       /* Point this index at the head */
-       head->shared_ptr[idx] = head->head;
-
-       /* Point head at this index */
-       head->head = idx;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cm.c b/drivers/staging/rdma/amso1100/c2_cm.c
deleted file mode 100644 (file)
index f8dbdb9..0000000
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_vq.h"
-#include <rdma/iw_cm.h>
-
-int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       struct c2_dev *c2dev = to_c2dev(cm_id->device);
-       struct ib_qp *ibqp;
-       struct c2_qp *qp;
-       struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
-       struct c2_vq_req *vq_req;
-       int err;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-
-       if (cm_id->remote_addr.ss_family != AF_INET)
-               return -ENOSYS;
-
-       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-       if (!ibqp)
-               return -EINVAL;
-       qp = to_c2qp(ibqp);
-
-       /* Associate QP <--> CM_ID */
-       cm_id->provider_data = qp;
-       cm_id->add_ref(cm_id);
-       qp->cm_id = cm_id;
-
-       /*
-        * only support the max private_data length
-        */
-       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-               err = -EINVAL;
-               goto bail0;
-       }
-       /*
-        * Set the rdma read limits
-        */
-       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-       if (err)
-               goto bail0;
-
-       /*
-        * Create and send a WR_QP_CONNECT...
-        */
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       c2_wr_set_id(wr, CCWR_QP_CONNECT);
-       wr->hdr.context = 0;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->qp_handle = qp->adapter_handle;
-
-       wr->remote_addr = raddr->sin_addr.s_addr;
-       wr->remote_port = raddr->sin_port;
-
-       /*
-        * Move any private data from the callers's buf into
-        * the WR.
-        */
-       if (iw_param->private_data) {
-               wr->private_data_length =
-                       cpu_to_be32(iw_param->private_data_len);
-               memcpy(&wr->private_data[0], iw_param->private_data,
-                      iw_param->private_data_len);
-       } else
-               wr->private_data_length = 0;
-
-       /*
-        * Send WR to adapter.  NOTE: There is no synch reply from
-        * the adapter.
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       vq_req_free(c2dev, vq_req);
-
- bail1:
-       kfree(wr);
- bail0:
-       if (err) {
-               /*
-                * If we fail, release reference on QP and
-                * disassociate QP from CM_ID
-                */
-               cm_id->provider_data = NULL;
-               qp->cm_id = NULL;
-               cm_id->rem_ref(cm_id);
-       }
-       return err;
-}
-
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-       struct c2_dev *c2dev;
-       struct c2wr_ep_listen_create_req wr;
-       struct c2wr_ep_listen_create_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-
-       if (cm_id->local_addr.ss_family != AF_INET)
-               return -ENOSYS;
-
-       c2dev = to_c2dev(cm_id->device);
-       if (c2dev == NULL)
-               return -EINVAL;
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
-       wr.hdr.context = (u64) (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.local_addr = laddr->sin_addr.s_addr;
-       wr.local_port = laddr->sin_port;
-       wr.backlog = cpu_to_be32(backlog);
-       wr.user_context = (u64) (unsigned long) cm_id;
-
-       /*
-        * Reference the request struct.  Dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply =
-           (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       if ((err = c2_errno(reply)) != 0)
-               goto bail1;
-
-       /*
-        * Keep the adapter handle. Used in subsequent destroy
-        */
-       cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
-
-       /*
-        * free vq stuff
-        */
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       return 0;
-
- bail1:
-       vq_repbuf_free(c2dev, reply);
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-
-int c2_llp_service_destroy(struct iw_cm_id *cm_id)
-{
-
-       struct c2_dev *c2dev;
-       struct c2wr_ep_listen_destroy_req wr;
-       struct c2wr_ep_listen_destroy_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       c2dev = to_c2dev(cm_id->device);
-       if (c2dev == NULL)
-               return -EINVAL;
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       vq_repbuf_free(c2dev, reply);
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       struct c2_dev *c2dev = to_c2dev(cm_id->device);
-       struct c2_qp *qp;
-       struct ib_qp *ibqp;
-       struct c2wr_cr_accept_req *wr;  /* variable length WR */
-       struct c2_vq_req *vq_req;
-       struct c2wr_cr_accept_rep *reply;       /* VQ Reply msg ptr. */
-       int err;
-
-       ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-       if (!ibqp)
-               return -EINVAL;
-       qp = to_c2qp(ibqp);
-
-       /* Set the RDMA read limits */
-       err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-       if (err)
-               goto bail0;
-
-       /* Allocate verbs request. */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-       vq_req->qp = qp;
-       vq_req->cm_id = cm_id;
-       vq_req->event = IW_CM_EVENT_ESTABLISHED;
-
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       /* Build the WR */
-       c2_wr_set_id(wr, CCWR_CR_ACCEPT);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
-       wr->qp_handle = qp->adapter_handle;
-
-       /* Replace the cr_handle with the QP after accept */
-       cm_id->provider_data = qp;
-       cm_id->add_ref(cm_id);
-       qp->cm_id = cm_id;
-
-       cm_id->provider_data = qp;
-
-       /* Validate private_data length */
-       if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-               err = -EINVAL;
-               goto bail1;
-       }
-
-       if (iw_param->private_data) {
-               wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
-               memcpy(&wr->private_data[0],
-                      iw_param->private_data, iw_param->private_data_len);
-       } else
-               wr->private_data_length = 0;
-
-       /* Reference the request struct.  Dereferenced in the int handler. */
-       vq_req_get(c2dev, vq_req);
-
-       /* Send WR to adapter */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       /* Wait for reply from adapter */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       /* Check that reply is present */
-       reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-       if (!err)
-               c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail1:
-       kfree(wr);
-       vq_req_free(c2dev, vq_req);
- bail0:
-       if (err) {
-               /*
-                * If we fail, release reference on QP and
-                * disassociate QP from CM_ID
-                */
-               cm_id->provider_data = NULL;
-               qp->cm_id = NULL;
-               cm_id->rem_ref(cm_id);
-       }
-       return err;
-}
-
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-       struct c2_dev *c2dev;
-       struct c2wr_cr_reject_req wr;
-       struct c2_vq_req *vq_req;
-       struct c2wr_cr_reject_rep *reply;
-       int err;
-
-       c2dev = to_c2dev(cm_id->device);
-
-       /*
-        * Allocate verbs request.
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_CR_REJECT);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_cr_reject_rep *) (unsigned long)
-               vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-       err = c2_errno(reply);
-       /*
-        * free vq stuff
-        */
-       vq_repbuf_free(c2dev, reply);
-
- bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
deleted file mode 100644 (file)
index 3ef881f..0000000
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
-
-static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
-{
-       struct c2_cq *cq;
-       unsigned long flags;
-
-       spin_lock_irqsave(&c2dev->lock, flags);
-       cq = c2dev->qptr_array[cqn];
-       if (!cq) {
-               spin_unlock_irqrestore(&c2dev->lock, flags);
-               return NULL;
-       }
-       atomic_inc(&cq->refcount);
-       spin_unlock_irqrestore(&c2dev->lock, flags);
-       return cq;
-}
-
-static void c2_cq_put(struct c2_cq *cq)
-{
-       if (atomic_dec_and_test(&cq->refcount))
-               wake_up(&cq->wait);
-}
-
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
-{
-       struct c2_cq *cq;
-
-       cq = c2_cq_get(c2dev, mq_index);
-       if (!cq) {
-               printk("discarding events on destroyed CQN=%d\n", mq_index);
-               return;
-       }
-
-       (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
-       c2_cq_put(cq);
-}
-
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
-{
-       struct c2_cq *cq;
-       struct c2_mq *q;
-
-       cq = c2_cq_get(c2dev, mq_index);
-       if (!cq)
-               return;
-
-       spin_lock_irq(&cq->lock);
-       q = &cq->mq;
-       if (q && !c2_mq_empty(q)) {
-               u16 priv = q->priv;
-               struct c2wr_ce *msg;
-
-               while (priv != be16_to_cpu(*q->shared)) {
-                       msg = (struct c2wr_ce *)
-                               (q->msg_pool.host + priv * q->msg_size);
-                       if (msg->qp_user_context == (u64) (unsigned long) qp) {
-                               msg->qp_user_context = (u64) 0;
-                       }
-                       priv = (priv + 1) % q->q_size;
-               }
-       }
-       spin_unlock_irq(&cq->lock);
-       c2_cq_put(cq);
-}
-
-static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
-{
-       switch (status) {
-       case C2_OK:
-               return IB_WC_SUCCESS;
-       case CCERR_FLUSHED:
-               return IB_WC_WR_FLUSH_ERR;
-       case CCERR_BASE_AND_BOUNDS_VIOLATION:
-               return IB_WC_LOC_PROT_ERR;
-       case CCERR_ACCESS_VIOLATION:
-               return IB_WC_LOC_ACCESS_ERR;
-       case CCERR_TOTAL_LENGTH_TOO_BIG:
-               return IB_WC_LOC_LEN_ERR;
-       case CCERR_INVALID_WINDOW:
-               return IB_WC_MW_BIND_ERR;
-       default:
-               return IB_WC_GENERAL_ERR;
-       }
-}
-
-
-static inline int c2_poll_one(struct c2_dev *c2dev,
-                             struct c2_cq *cq, struct ib_wc *entry)
-{
-       struct c2wr_ce *ce;
-       struct c2_qp *qp;
-       int is_recv = 0;
-
-       ce = c2_mq_consume(&cq->mq);
-       if (!ce) {
-               return -EAGAIN;
-       }
-
-       /*
-        * if the qp returned is null then this qp has already
-        * been freed and we are unable process the completion.
-        * try pulling the next message
-        */
-       while ((qp =
-               (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
-               c2_mq_free(&cq->mq);
-               ce = c2_mq_consume(&cq->mq);
-               if (!ce)
-                       return -EAGAIN;
-       }
-
-       entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
-       entry->wr_id = ce->hdr.context;
-       entry->qp = &qp->ibqp;
-       entry->wc_flags = 0;
-       entry->slid = 0;
-       entry->sl = 0;
-       entry->src_qp = 0;
-       entry->dlid_path_bits = 0;
-       entry->pkey_index = 0;
-
-       switch (c2_wr_get_id(ce)) {
-       case C2_WR_TYPE_SEND:
-               entry->opcode = IB_WC_SEND;
-               break;
-       case C2_WR_TYPE_RDMA_WRITE:
-               entry->opcode = IB_WC_RDMA_WRITE;
-               break;
-       case C2_WR_TYPE_RDMA_READ:
-               entry->opcode = IB_WC_RDMA_READ;
-               break;
-       case C2_WR_TYPE_BIND_MW:
-               entry->opcode = IB_WC_BIND_MW;
-               break;
-       case C2_WR_TYPE_RECV:
-               entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
-               entry->opcode = IB_WC_RECV;
-               is_recv = 1;
-               break;
-       default:
-               break;
-       }
-
-       /* consume the WQEs */
-       if (is_recv)
-               c2_mq_lconsume(&qp->rq_mq, 1);
-       else
-               c2_mq_lconsume(&qp->sq_mq,
-                              be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
-
-       /* free the message */
-       c2_mq_free(&cq->mq);
-
-       return 0;
-}
-
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-       struct c2_dev *c2dev = to_c2dev(ibcq->device);
-       struct c2_cq *cq = to_c2cq(ibcq);
-       unsigned long flags;
-       int npolled, err;
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       for (npolled = 0; npolled < num_entries; ++npolled) {
-
-               err = c2_poll_one(c2dev, cq, entry + npolled);
-               if (err)
-                       break;
-       }
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       return npolled;
-}
-
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-       struct c2_mq_shared __iomem *shared;
-       struct c2_cq *cq;
-       unsigned long flags;
-       int ret = 0;
-
-       cq = to_c2cq(ibcq);
-       shared = cq->mq.peer;
-
-       if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
-               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
-       else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-               writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
-       else
-               return -EINVAL;
-
-       writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
-
-       /*
-        * Now read back shared->armed to make the PCI
-        * write synchronous.  This is necessary for
-        * correct cq notification semantics.
-        */
-       readb(&shared->armed);
-
-       if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-               spin_lock_irqsave(&cq->lock, flags);
-               ret = !c2_mq_empty(&cq->mq);
-               spin_unlock_irqrestore(&cq->lock, flags);
-       }
-
-       return ret;
-}
-
-static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
-{
-       dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
-                         mq->msg_pool.host, dma_unmap_addr(mq, mapping));
-}
-
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
-                          size_t q_size, size_t msg_size)
-{
-       u8 *pool_start;
-
-       if (q_size > SIZE_MAX / msg_size)
-               return -EINVAL;
-
-       pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
-                                       &mq->host_dma, GFP_KERNEL);
-       if (!pool_start)
-               return -ENOMEM;
-
-       c2_mq_rep_init(mq,
-                      0,               /* index (currently unknown) */
-                      q_size,
-                      msg_size,
-                      pool_start,
-                      NULL,    /* peer (currently unknown) */
-                      C2_MQ_HOST_TARGET);
-
-       dma_unmap_addr_set(mq, mapping, mq->host_dma);
-
-       return 0;
-}
-
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-              struct c2_ucontext *ctx, struct c2_cq *cq)
-{
-       struct c2wr_cq_create_req wr;
-       struct c2wr_cq_create_rep *reply;
-       unsigned long peer_pa;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       might_sleep();
-
-       cq->ibcq.cqe = entries - 1;
-       cq->is_kernel = !ctx;
-
-       /* Allocate a shared pointer */
-       cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                     &cq->mq.shared_dma, GFP_KERNEL);
-       if (!cq->mq.shared)
-               return -ENOMEM;
-
-       /* Allocate pages for the message pool */
-       err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
-       if (err)
-               goto bail0;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_CQ_CREATE);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.msg_size = cpu_to_be32(cq->mq.msg_size);
-       wr.depth = cpu_to_be32(cq->mq.q_size);
-       wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
-       wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
-       wr.user_context = (u64) (unsigned long) (cq);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail2;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail2;
-
-       reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-
-       if ((err = c2_errno(reply)) != 0)
-               goto bail3;
-
-       cq->adapter_handle = reply->cq_handle;
-       cq->mq.index = be32_to_cpu(reply->mq_index);
-
-       peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
-       cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
-       if (!cq->mq.peer) {
-               err = -ENOMEM;
-               goto bail3;
-       }
-
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       spin_lock_init(&cq->lock);
-       atomic_set(&cq->refcount, 1);
-       init_waitqueue_head(&cq->wait);
-
-       /*
-        * Use the MQ index allocated by the adapter to
-        * store the CQ in the qptr_array
-        */
-       cq->cqn = cq->mq.index;
-       c2dev->qptr_array[cq->cqn] = cq;
-
-       return 0;
-
-bail3:
-       vq_repbuf_free(c2dev, reply);
-bail2:
-       vq_req_free(c2dev, vq_req);
-bail1:
-       c2_free_cq_buf(c2dev, &cq->mq);
-bail0:
-       c2_free_mqsp(cq->mq.shared);
-
-       return err;
-}
-
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
-{
-       int err;
-       struct c2_vq_req *vq_req;
-       struct c2wr_cq_destroy_req wr;
-       struct c2wr_cq_destroy_rep *reply;
-
-       might_sleep();
-
-       /* Clear CQ from the qptr array */
-       spin_lock_irq(&c2dev->lock);
-       c2dev->qptr_array[cq->mq.index] = NULL;
-       atomic_dec(&cq->refcount);
-       spin_unlock_irq(&c2dev->lock);
-
-       wait_event(cq->wait, !atomic_read(&cq->refcount));
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               goto bail0;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.cq_handle = cq->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-       if (reply)
-               vq_repbuf_free(c2dev, reply);
-bail1:
-       vq_req_free(c2dev, vq_req);
-bail0:
-       if (cq->is_kernel) {
-               c2_free_cq_buf(c2dev, &cq->mq);
-       }
-
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_intr.c b/drivers/staging/rdma/amso1100/c2_intr.c
deleted file mode 100644 (file)
index 74b32a9..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_vq.h"
-
-static void handle_mq(struct c2_dev *c2dev, u32 index);
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
-
-/*
- * Handle RNIC interrupts
- */
-void c2_rnic_interrupt(struct c2_dev *c2dev)
-{
-       unsigned int mq_index;
-
-       while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
-               mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
-               if (mq_index & 0x80000000) {
-                       break;
-               }
-
-               c2dev->hints_read++;
-               handle_mq(c2dev, mq_index);
-       }
-
-}
-
-/*
- * Top level MQ handler
- */
-static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
-{
-       if (c2dev->qptr_array[mq_index] == NULL) {
-               pr_debug("handle_mq: stray activity for mq_index=%d\n",
-                        mq_index);
-               return;
-       }
-
-       switch (mq_index) {
-       case (0):
-               /*
-                * An index of 0 in the activity queue
-                * indicates the req vq now has messages
-                * available...
-                *
-                * Wake up any waiters waiting on req VQ
-                * message availability.
-                */
-               wake_up(&c2dev->req_vq_wo);
-               break;
-       case (1):
-               handle_vq(c2dev, mq_index);
-               break;
-       case (2):
-               /* We have to purge the VQ in case there are pending
-                * accept reply requests that would result in the
-                * generation of an ESTABLISHED event. If we don't
-                * generate these first, a CLOSE event could end up
-                * being delivered before the ESTABLISHED event.
-                */
-               handle_vq(c2dev, 1);
-
-               c2_ae_event(c2dev, mq_index);
-               break;
-       default:
-               /* There is no event synchronization between CQ events
-                * and AE or CM events. In fact, CQE could be
-                * delivered for all of the I/O up to and including the
-                * FLUSH for a peer disconenct prior to the ESTABLISHED
-                * event being delivered to the app. The reason for this
-                * is that CM events are delivered on a thread, while AE
-                * and CM events are delivered on interrupt context.
-                */
-               c2_cq_event(c2dev, mq_index);
-               break;
-       }
-
-       return;
-}
-
-/*
- * Handles verbs WR replies.
- */
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
-{
-       void *adapter_msg, *reply_msg;
-       struct c2wr_hdr *host_msg;
-       struct c2wr_hdr tmp;
-       struct c2_mq *reply_vq;
-       struct c2_vq_req *req;
-       struct iw_cm_event cm_event;
-       int err;
-
-       reply_vq = c2dev->qptr_array[mq_index];
-
-       /*
-        * get next msg from mq_index into adapter_msg.
-        * don't free it yet.
-        */
-       adapter_msg = c2_mq_consume(reply_vq);
-       if (adapter_msg == NULL) {
-               return;
-       }
-
-       host_msg = vq_repbuf_alloc(c2dev);
-
-       /*
-        * If we can't get a host buffer, then we'll still
-        * wakeup the waiter, we just won't give him the msg.
-        * It is assumed the waiter will deal with this...
-        */
-       if (!host_msg) {
-               pr_debug("handle_vq: no repbufs!\n");
-
-               /*
-                * just copy the WR header into a local variable.
-                * this allows us to still demux on the context
-                */
-               host_msg = &tmp;
-               memcpy(host_msg, adapter_msg, sizeof(tmp));
-               reply_msg = NULL;
-       } else {
-               memcpy(host_msg, adapter_msg, reply_vq->msg_size);
-               reply_msg = host_msg;
-       }
-
-       /*
-        * consume the msg from the MQ
-        */
-       c2_mq_free(reply_vq);
-
-       /*
-        * wakeup the waiter.
-        */
-       req = (struct c2_vq_req *) (unsigned long) host_msg->context;
-       if (req == NULL) {
-               /*
-                * We should never get here, as the adapter should
-                * never send us a reply that we're not expecting.
-                */
-               if (reply_msg != NULL)
-                       vq_repbuf_free(c2dev, host_msg);
-               pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
-               return;
-       }
-
-       if (reply_msg)
-               err = c2_errno(reply_msg);
-       else
-               err = -ENOMEM;
-
-       if (!err) switch (req->event) {
-       case IW_CM_EVENT_ESTABLISHED:
-               c2_set_qp_state(req->qp,
-                               C2_QP_STATE_RTS);
-               /*
-                * Until ird/ord negotiation via MPAv2 support is added, send
-                * max supported values
-                */
-               cm_event.ird = cm_event.ord = 128;
-       case IW_CM_EVENT_CLOSE:
-
-               /*
-                * Move the QP to RTS if this is
-                * the established event
-                */
-               cm_event.event = req->event;
-               cm_event.status = 0;
-               cm_event.local_addr = req->cm_id->local_addr;
-               cm_event.remote_addr = req->cm_id->remote_addr;
-               cm_event.private_data = NULL;
-               cm_event.private_data_len = 0;
-               req->cm_id->event_handler(req->cm_id, &cm_event);
-               break;
-       default:
-               break;
-       }
-
-       req->reply_msg = (u64) (unsigned long) (reply_msg);
-       atomic_set(&req->reply_ready, 1);
-       wake_up(&req->wait_object);
-
-       /*
-        * If the request was cancelled, then this put will
-        * free the vq_req memory...and reply_msg!!!
-        */
-       vq_req_put(c2dev, req);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mm.c b/drivers/staging/rdma/amso1100/c2_mm.c
deleted file mode 100644 (file)
index 25081e2..0000000
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-
-#define PBL_VIRT 1
-#define PBL_PHYS 2
-
-/*
- * Send all the PBL messages to convey the remainder of the PBL
- * Wait for the adapter's reply on the last one.
- * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
- *
- * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
- *       Reply buffer _is_ freed by this function.
- */
-static int
-send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
-                 unsigned long va, u32 pbl_depth,
-                 struct c2_vq_req *vq_req, int pbl_type)
-{
-       u32 pbe_count;          /* amt that fits in a PBL msg */
-       u32 count;              /* amt in this PBL MSG. */
-       struct c2wr_nsmr_pbl_req *wr;   /* PBL WR ptr */
-       struct c2wr_nsmr_pbl_rep *reply;        /* reply ptr */
-       int err, pbl_virt, pbl_index, i;
-
-       switch (pbl_type) {
-       case PBL_VIRT:
-               pbl_virt = 1;
-               break;
-       case PBL_PHYS:
-               pbl_virt = 0;
-               break;
-       default:
-               return -EINVAL;
-               break;
-       }
-
-       pbe_count = (c2dev->req_vq.msg_size -
-                    sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               return -ENOMEM;
-       }
-       c2_wr_set_id(wr, CCWR_NSMR_PBL);
-
-       /*
-        * Only the last PBL message will generate a reply from the verbs,
-        * so we set the context to 0 indicating there is no kernel verbs
-        * handler blocked awaiting this reply.
-        */
-       wr->hdr.context = 0;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->stag_index = stag_index;    /* already swapped */
-       wr->flags = 0;
-       pbl_index = 0;
-       while (pbl_depth) {
-               count = min(pbe_count, pbl_depth);
-               wr->addrs_length = cpu_to_be32(count);
-
-               /*
-                *  If this is the last message, then reference the
-                *  vq request struct cuz we're gonna wait for a reply.
-                *  also make this PBL msg as the last one.
-                */
-               if (count == pbl_depth) {
-                       /*
-                        * reference the request struct.  dereferenced in the
-                        * int handler.
-                        */
-                       vq_req_get(c2dev, vq_req);
-                       wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
-
-                       /*
-                        * This is the last PBL message.
-                        * Set the context to our VQ Request Object so we can
-                        * wait for the reply.
-                        */
-                       wr->hdr.context = (unsigned long) vq_req;
-               }
-
-               /*
-                * If pbl_virt is set then va is a virtual address
-                * that describes a virtually contiguous memory
-                * allocation. The wr needs the start of each virtual page
-                * to be converted to the corresponding physical address
-                * of the page. If pbl_virt is not set then va is an array
-                * of physical addresses and there is no conversion to do.
-                * Just fill in the wr with what is in the array.
-                */
-               for (i = 0; i < count; i++) {
-                       if (pbl_virt) {
-                               va += PAGE_SIZE;
-                       } else {
-                               wr->paddrs[i] =
-                                   cpu_to_be64(((u64 *)va)[pbl_index + i]);
-                       }
-               }
-
-               /*
-                * Send WR to adapter
-                */
-               err = vq_send_wr(c2dev, (union c2wr *) wr);
-               if (err) {
-                       if (count <= pbe_count) {
-                               vq_req_put(c2dev, vq_req);
-                       }
-                       goto bail0;
-               }
-               pbl_depth -= count;
-               pbl_index += count;
-       }
-
-       /*
-        *  Now wait for the reply...
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       kfree(wr);
-       return err;
-}
-
-#define C2_PBL_MAX_DEPTH 131072
-int
-c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
-                          int page_size, int pbl_depth, u32 length,
-                          u32 offset, u64 *va, enum c2_acf acf,
-                          struct c2_mr *mr)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_nsmr_register_req *wr;
-       struct c2wr_nsmr_register_rep *reply;
-       u16 flags;
-       int i, pbe_count, count;
-       int err;
-
-       if (!va || !length || !addr_list || !pbl_depth)
-               return -EINTR;
-
-       /*
-        * Verify PBL depth is within rnic max
-        */
-       if (pbl_depth > C2_PBL_MAX_DEPTH) {
-               return -EINTR;
-       }
-
-       /*
-        * allocate verbs request object
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       /*
-        * build the WR
-        */
-       c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-
-       flags = (acf | MEM_VA_BASED | MEM_REMOTE);
-
-       /*
-        * compute how many pbes can fit in the message
-        */
-       pbe_count = (c2dev->req_vq.msg_size -
-                    sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
-
-       if (pbl_depth <= pbe_count) {
-               flags |= MEM_PBL_COMPLETE;
-       }
-       wr->flags = cpu_to_be16(flags);
-       wr->stag_key = 0;       //stag_key;
-       wr->va = cpu_to_be64(*va);
-       wr->pd_id = mr->pd->pd_id;
-       wr->pbe_size = cpu_to_be32(page_size);
-       wr->length = cpu_to_be32(length);
-       wr->pbl_depth = cpu_to_be32(pbl_depth);
-       wr->fbo = cpu_to_be32(offset);
-       count = min(pbl_depth, pbe_count);
-       wr->addrs_length = cpu_to_be32(count);
-
-       /*
-        * fill out the PBL for this message
-        */
-       for (i = 0; i < count; i++) {
-               wr->paddrs[i] = cpu_to_be64(addr_list[i]);
-       }
-
-       /*
-        * regerence the request struct
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * send the WR to the adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       /*
-        * wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail1;
-       }
-
-       /*
-        * process reply
-        */
-       reply =
-           (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-       if ((err = c2_errno(reply))) {
-               goto bail2;
-       }
-       //*p_pb_entries = be32_to_cpu(reply->pbl_depth);
-       mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
-       vq_repbuf_free(c2dev, reply);
-
-       /*
-        * if there are still more PBEs we need to send them to
-        * the adapter and wait for a reply on the final one.
-        * reuse vq_req for this purpose.
-        */
-       pbl_depth -= count;
-       if (pbl_depth) {
-
-               vq_req->reply_msg = (unsigned long) NULL;
-               atomic_set(&vq_req->reply_ready, 0);
-               err = send_pbl_messages(c2dev,
-                                       cpu_to_be32(mr->ibmr.lkey),
-                                       (unsigned long) &addr_list[i],
-                                       pbl_depth, vq_req, PBL_PHYS);
-               if (err) {
-                       goto bail1;
-               }
-       }
-
-       vq_req_free(c2dev, vq_req);
-       kfree(wr);
-
-       return err;
-
-bail2:
-       vq_repbuf_free(c2dev, reply);
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
-{
-       struct c2_vq_req *vq_req;       /* verbs request object */
-       struct c2wr_stag_dealloc_req wr;        /* work request */
-       struct c2wr_stag_dealloc_rep *reply;    /* WR reply  */
-       int err;
-
-
-       /*
-        * allocate verbs request object
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               return -ENOMEM;
-       }
-
-       /*
-        * Build the WR
-        */
-       c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
-       wr.hdr.context = (u64) (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.stag_index = cpu_to_be32(stag_index);
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.c b/drivers/staging/rdma/amso1100/c2_mq.c
deleted file mode 100644 (file)
index 7827fb8..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include "c2_mq.h"
-
-void *c2_mq_alloc(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       if (c2_mq_full(q)) {
-               return NULL;
-       } else {
-#ifdef DEBUG
-               struct c2wr_hdr *m =
-                   (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-               BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
-               m->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-               return m;
-#else
-               return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-       }
-}
-
-void c2_mq_produce(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       if (!c2_mq_full(q)) {
-               q->priv = (q->priv + 1) % q->q_size;
-               q->hint_count++;
-               /* Update peer's offset. */
-               __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-       }
-}
-
-void *c2_mq_consume(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-       if (c2_mq_empty(q)) {
-               return NULL;
-       } else {
-#ifdef DEBUG
-               struct c2wr_hdr *m = (struct c2wr_hdr *)
-                   (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-               BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
-#endif
-               return m;
-#else
-               return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-       }
-}
-
-void c2_mq_free(struct c2_mq *q)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-       if (!c2_mq_empty(q)) {
-
-#ifdef CCMSGMAGIC
-               {
-                       struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
-                           (q->msg_pool.adapter + q->priv * q->msg_size);
-                       __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
-               }
-#endif
-               q->priv = (q->priv + 1) % q->q_size;
-               /* Update peer's offset. */
-               __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-       }
-}
-
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
-{
-       BUG_ON(q->magic != C2_MQ_MAGIC);
-       BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-       while (wqe_count--) {
-               BUG_ON(c2_mq_empty(q));
-               *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
-       }
-}
-
-#if 0
-u32 c2_mq_count(struct c2_mq *q)
-{
-       s32 count;
-
-       if (q->type == C2_MQ_HOST_TARGET)
-               count = be16_to_cpu(*q->shared) - q->priv;
-       else
-               count = q->priv - be16_to_cpu(*q->shared);
-
-       if (count < 0)
-               count += q->q_size;
-
-       return (u32) count;
-}
-#endif  /*  0  */
-
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                   u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
-{
-       BUG_ON(!q->shared);
-
-       /* This code assumes the byte swapping has already been done! */
-       q->index = index;
-       q->q_size = q_size;
-       q->msg_size = msg_size;
-       q->msg_pool.adapter = pool_start;
-       q->peer = (struct c2_mq_shared __iomem *) peer;
-       q->magic = C2_MQ_MAGIC;
-       q->type = type;
-       q->priv = 0;
-       q->hint_count = 0;
-       return;
-}
-
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                   u8 *pool_start, u16 __iomem *peer, u32 type)
-{
-       BUG_ON(!q->shared);
-
-       /* This code assumes the byte swapping has already been done! */
-       q->index = index;
-       q->q_size = q_size;
-       q->msg_size = msg_size;
-       q->msg_pool.host = pool_start;
-       q->peer = (struct c2_mq_shared __iomem *) peer;
-       q->magic = C2_MQ_MAGIC;
-       q->type = type;
-       q->priv = 0;
-       q->hint_count = 0;
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.h b/drivers/staging/rdma/amso1100/c2_mq.h
deleted file mode 100644 (file)
index 8e1b4d1..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _C2_MQ_H_
-#define _C2_MQ_H_
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include "c2_wr.h"
-
-enum c2_shared_regs {
-
-       C2_SHARED_ARMED = 0x10,
-       C2_SHARED_NOTIFY = 0x18,
-       C2_SHARED_SHARED = 0x40,
-};
-
-struct c2_mq_shared {
-       u16 unused1;
-       u8 armed;
-       u8 notification_type;
-       u32 unused2;
-       u16 shared;
-       /* Pad to 64 bytes. */
-       u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
-};
-
-enum c2_mq_type {
-       C2_MQ_HOST_TARGET = 1,
-       C2_MQ_ADAPTER_TARGET = 2,
-};
-
-/*
- * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
- * c2_user_mq_t (which is the same format) is for user-mode MQs...
- */
-#define C2_MQ_MAGIC 0x4d512020 /* 'MQ  ' */
-struct c2_mq {
-       u32 magic;
-       union {
-               u8 *host;
-               u8 __iomem *adapter;
-       } msg_pool;
-       dma_addr_t host_dma;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       u16 hint_count;
-       u16 priv;
-       struct c2_mq_shared __iomem *peer;
-       __be16 *shared;
-       dma_addr_t shared_dma;
-       u32 q_size;
-       u32 msg_size;
-       u32 index;
-       enum c2_mq_type type;
-};
-
-static __inline__ int c2_mq_empty(struct c2_mq *q)
-{
-       return q->priv == be16_to_cpu(*q->shared);
-}
-
-static __inline__ int c2_mq_full(struct c2_mq *q)
-{
-       return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
-}
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
-void *c2_mq_alloc(struct c2_mq *q);
-void c2_mq_produce(struct c2_mq *q);
-void *c2_mq_consume(struct c2_mq *q);
-void c2_mq_free(struct c2_mq *q);
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                      u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-                          u8 *pool_start, u16 __iomem *peer, u32 type);
-
-#endif                         /* _C2_MQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_pd.c b/drivers/staging/rdma/amso1100/c2_pd.c
deleted file mode 100644 (file)
index f3e81dc..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-
-#include "c2.h"
-#include "c2_provider.h"
-
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
-{
-       u32 obj;
-       int ret = 0;
-
-       spin_lock(&c2dev->pd_table.lock);
-       obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
-                                c2dev->pd_table.last);
-       if (obj >= c2dev->pd_table.max)
-               obj = find_first_zero_bit(c2dev->pd_table.table,
-                                         c2dev->pd_table.max);
-       if (obj < c2dev->pd_table.max) {
-               pd->pd_id = obj;
-               __set_bit(obj, c2dev->pd_table.table);
-               c2dev->pd_table.last = obj+1;
-               if (c2dev->pd_table.last >= c2dev->pd_table.max)
-                       c2dev->pd_table.last = 0;
-       } else
-               ret = -ENOMEM;
-       spin_unlock(&c2dev->pd_table.lock);
-       return ret;
-}
-
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
-{
-       spin_lock(&c2dev->pd_table.lock);
-       __clear_bit(pd->pd_id, c2dev->pd_table.table);
-       spin_unlock(&c2dev->pd_table.lock);
-}
-
-int c2_init_pd_table(struct c2_dev *c2dev)
-{
-
-       c2dev->pd_table.last = 0;
-       c2dev->pd_table.max = c2dev->props.max_pd;
-       spin_lock_init(&c2dev->pd_table.lock);
-       c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
-                                       sizeof(long), GFP_KERNEL);
-       if (!c2dev->pd_table.table)
-               return -ENOMEM;
-       bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
-       return 0;
-}
-
-void c2_cleanup_pd_table(struct c2_dev *c2dev)
-{
-       kfree(c2dev->pd_table.table);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
deleted file mode 100644 (file)
index a092ac7..0000000
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/if_arp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include "c2.h"
-#include "c2_provider.h"
-#include "c2_user.h"
-
-static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                          struct ib_udata *uhw)
-{
-       struct c2_dev *c2dev = to_c2dev(ibdev);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       *props = c2dev->props;
-       return 0;
-}
-
-static int c2_query_port(struct ib_device *ibdev,
-                        u8 port, struct ib_port_attr *props)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       props->max_mtu = IB_MTU_4096;
-       props->lid = 0;
-       props->lmc = 0;
-       props->sm_lid = 0;
-       props->sm_sl = 0;
-       props->state = IB_PORT_ACTIVE;
-       props->phys_state = 0;
-       props->port_cap_flags =
-           IB_PORT_CM_SUP |
-           IB_PORT_REINIT_SUP |
-           IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
-       props->gid_tbl_len = 1;
-       props->pkey_tbl_len = 1;
-       props->qkey_viol_cntr = 0;
-       props->active_width = 1;
-       props->active_speed = IB_SPEED_SDR;
-
-       return 0;
-}
-
-static int c2_query_pkey(struct ib_device *ibdev,
-                        u8 port, u16 index, u16 * pkey)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       *pkey = 0;
-       return 0;
-}
-
-static int c2_query_gid(struct ib_device *ibdev, u8 port,
-                       int index, union ib_gid *gid)
-{
-       struct c2_dev *c2dev = to_c2dev(ibdev);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       memset(&(gid->raw[0]), 0, sizeof(gid->raw));
-       memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
-
-       return 0;
-}
-
-/* Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
-                                            struct ib_udata *udata)
-{
-       struct c2_ucontext *context;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       context = kmalloc(sizeof(*context), GFP_KERNEL);
-       if (!context)
-               return ERR_PTR(-ENOMEM);
-
-       return &context->ibucontext;
-}
-
-static int c2_dealloc_ucontext(struct ib_ucontext *context)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       kfree(context);
-       return 0;
-}
-
-static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
-                                struct ib_ucontext *context,
-                                struct ib_udata *udata)
-{
-       struct c2_pd *pd;
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-       if (!pd)
-               return ERR_PTR(-ENOMEM);
-
-       err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
-       if (err) {
-               kfree(pd);
-               return ERR_PTR(err);
-       }
-
-       if (context) {
-               if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
-                       c2_pd_free(to_c2dev(ibdev), pd);
-                       kfree(pd);
-                       return ERR_PTR(-EFAULT);
-               }
-       }
-
-       return &pd->ibpd;
-}
-
-static int c2_dealloc_pd(struct ib_pd *pd)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
-       kfree(pd);
-
-       return 0;
-}
-
-static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return ERR_PTR(-ENOSYS);
-}
-
-static int c2_ah_destroy(struct ib_ah *ah)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static void c2_add_ref(struct ib_qp *ibqp)
-{
-       struct c2_qp *qp;
-       BUG_ON(!ibqp);
-       qp = to_c2qp(ibqp);
-       atomic_inc(&qp->refcount);
-}
-
-static void c2_rem_ref(struct ib_qp *ibqp)
-{
-       struct c2_qp *qp;
-       BUG_ON(!ibqp);
-       qp = to_c2qp(ibqp);
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
-{
-       struct c2_dev* c2dev = to_c2dev(device);
-       struct c2_qp *qp;
-
-       qp = c2_find_qpn(c2dev, qpn);
-       pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
-               __func__, qp, qpn, device,
-               (qp?atomic_read(&qp->refcount):0));
-
-       return (qp?&qp->ibqp:NULL);
-}
-
-static struct ib_qp *c2_create_qp(struct ib_pd *pd,
-                                 struct ib_qp_init_attr *init_attr,
-                                 struct ib_udata *udata)
-{
-       struct c2_qp *qp;
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       if (init_attr->create_flags)
-               return ERR_PTR(-EINVAL);
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_RC:
-               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-               if (!qp) {
-                       pr_debug("%s: Unable to allocate QP\n", __func__);
-                       return ERR_PTR(-ENOMEM);
-               }
-               spin_lock_init(&qp->lock);
-               if (pd->uobject) {
-                       /* userspace specific */
-               }
-
-               err = c2_alloc_qp(to_c2dev(pd->device),
-                                 to_c2pd(pd), init_attr, qp);
-
-               if (err && pd->uobject) {
-                       /* userspace specific */
-               }
-
-               break;
-       default:
-               pr_debug("%s: Invalid QP type: %d\n", __func__,
-                       init_attr->qp_type);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (err) {
-               kfree(qp);
-               return ERR_PTR(err);
-       }
-
-       return &qp->ibqp;
-}
-
-static int c2_destroy_qp(struct ib_qp *ib_qp)
-{
-       struct c2_qp *qp = to_c2qp(ib_qp);
-
-       pr_debug("%s:%u qp=%p,qp->state=%d\n",
-               __func__, __LINE__, ib_qp, qp->state);
-       c2_free_qp(to_c2dev(ib_qp->device), qp);
-       kfree(qp);
-       return 0;
-}
-
-static struct ib_cq *c2_create_cq(struct ib_device *ibdev,
-                                 const struct ib_cq_init_attr *attr,
-                                 struct ib_ucontext *context,
-                                 struct ib_udata *udata)
-{
-       int entries = attr->cqe;
-       struct c2_cq *cq;
-       int err;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-       if (!cq) {
-               pr_debug("%s: Unable to allocate CQ\n", __func__);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
-       if (err) {
-               pr_debug("%s: error initializing CQ\n", __func__);
-               kfree(cq);
-               return ERR_PTR(err);
-       }
-
-       return &cq->ibcq;
-}
-
-static int c2_destroy_cq(struct ib_cq *ib_cq)
-{
-       struct c2_cq *cq = to_c2cq(ib_cq);
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       c2_free_cq(to_c2dev(ib_cq->device), cq);
-       kfree(cq);
-
-       return 0;
-}
-
-static inline u32 c2_convert_access(int acc)
-{
-       return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
-           (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
-           (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
-           C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
-}
-
-static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
-                                   struct ib_phys_buf *buffer_list,
-                                   int num_phys_buf, int acc, u64 * iova_start)
-{
-       struct c2_mr *mr;
-       u64 *page_list;
-       u32 total_len;
-       int err, i, j, k, page_shift, pbl_depth;
-
-       pbl_depth = 0;
-       total_len = 0;
-
-       page_shift = PAGE_SHIFT;
-       /*
-        * If there is only 1 buffer we assume this could
-        * be a map of all phy mem...use a 32k page_shift.
-        */
-       if (num_phys_buf == 1)
-               page_shift += 3;
-
-       for (i = 0; i < num_phys_buf; i++) {
-
-               if (offset_in_page(buffer_list[i].addr)) {
-                       pr_debug("Unaligned Memory Buffer: 0x%x\n",
-                               (unsigned int) buffer_list[i].addr);
-                       return ERR_PTR(-EINVAL);
-               }
-
-               if (!buffer_list[i].size) {
-                       pr_debug("Invalid Buffer Size\n");
-                       return ERR_PTR(-EINVAL);
-               }
-
-               total_len += buffer_list[i].size;
-               pbl_depth += ALIGN(buffer_list[i].size,
-                                  BIT(page_shift)) >> page_shift;
-       }
-
-       page_list = vmalloc(sizeof(u64) * pbl_depth);
-       if (!page_list) {
-               pr_debug("couldn't vmalloc page_list of size %zd\n",
-                       (sizeof(u64) * pbl_depth));
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for (i = 0, j = 0; i < num_phys_buf; i++) {
-
-               int naddrs;
-
-               naddrs = ALIGN(buffer_list[i].size,
-                              BIT(page_shift)) >> page_shift;
-               for (k = 0; k < naddrs; k++)
-                       page_list[j++] = (buffer_list[i].addr +
-                                                    (k << page_shift));
-       }
-
-       mr = kmalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr) {
-               vfree(page_list);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       mr->pd = to_c2pd(ib_pd);
-       mr->umem = NULL;
-       pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
-               "*iova_start %llx, first pa %llx, last pa %llx\n",
-               __func__, page_shift, pbl_depth, total_len,
-               (unsigned long long) *iova_start,
-               (unsigned long long) page_list[0],
-               (unsigned long long) page_list[pbl_depth-1]);
-       err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
-                                        BIT(page_shift), pbl_depth,
-                                        total_len, 0, iova_start,
-                                        c2_convert_access(acc), mr);
-       vfree(page_list);
-       if (err) {
-               kfree(mr);
-               return ERR_PTR(err);
-       }
-
-       return &mr->ibmr;
-}
-
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ib_phys_buf bl;
-       u64 kva = 0;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* AMSO1100 limit */
-       bl.size = 0xffffffff;
-       bl.addr = 0;
-       return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
-}
-
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                                   u64 virt, int acc, struct ib_udata *udata)
-{
-       u64 *pages;
-       u64 kva = 0;
-       int shift, n, len;
-       int i, k, entry;
-       int err = 0;
-       struct scatterlist *sg;
-       struct c2_pd *c2pd = to_c2pd(pd);
-       struct c2_mr *c2mr;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
-       if (!c2mr)
-               return ERR_PTR(-ENOMEM);
-       c2mr->pd = c2pd;
-
-       c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
-       if (IS_ERR(c2mr->umem)) {
-               err = PTR_ERR(c2mr->umem);
-               kfree(c2mr);
-               return ERR_PTR(err);
-       }
-
-       shift = ffs(c2mr->umem->page_size) - 1;
-       n = c2mr->umem->nmap;
-
-       pages = kmalloc_array(n, sizeof(u64), GFP_KERNEL);
-       if (!pages) {
-               err = -ENOMEM;
-               goto err;
-       }
-
-       i = 0;
-       for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
-               len = sg_dma_len(sg) >> shift;
-               for (k = 0; k < len; ++k) {
-                       pages[i++] =
-                               sg_dma_address(sg) +
-                               (c2mr->umem->page_size * k);
-               }
-       }
-
-       kva = virt;
-       err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
-                                        pages,
-                                        c2mr->umem->page_size,
-                                        i,
-                                        length,
-                                        ib_umem_offset(c2mr->umem),
-                                        &kva,
-                                        c2_convert_access(acc),
-                                        c2mr);
-       kfree(pages);
-       if (err)
-               goto err;
-       return &c2mr->ibmr;
-
-err:
-       ib_umem_release(c2mr->umem);
-       kfree(c2mr);
-       return ERR_PTR(err);
-}
-
-static int c2_dereg_mr(struct ib_mr *ib_mr)
-{
-       struct c2_mr *mr = to_c2mr(ib_mr);
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
-       if (err)
-               pr_debug("c2_stag_dealloc failed: %d\n", err);
-       else {
-               if (mr->umem)
-                       ib_umem_release(mr->umem);
-               kfree(mr);
-       }
-
-       return err;
-}
-
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%x\n", c2dev->props.hw_ver);
-}
-
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
-                          char *buf)
-{
-       struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%x.%x.%x\n",
-                      (int) (c2dev->props.fw_ver >> 32),
-                      (int) (c2dev->props.fw_ver >> 16) & 0xffff,
-                      (int) (c2dev->props.fw_ver & 0xffff));
-}
-
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "AMSO1100\n");
-}
-
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *c2_dev_attributes[] = {
-       &dev_attr_hw_rev,
-       &dev_attr_fw_ver,
-       &dev_attr_hca_type,
-       &dev_attr_board_id
-};
-
-static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                       int attr_mask, struct ib_udata *udata)
-{
-       int err;
-
-       err =
-           c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
-                        attr_mask);
-
-       return err;
-}
-
-static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_process_mad(struct ib_device *ibdev,
-                         int mad_flags,
-                         u8 port_num,
-                         const struct ib_wc *in_wc,
-                         const struct ib_grh *in_grh,
-                         const struct ib_mad_hdr *in_mad,
-                         size_t in_mad_size,
-                         struct ib_mad_hdr *out_mad,
-                         size_t *out_mad_size,
-                         u16 *out_mad_pkey_index)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       return -ENOSYS;
-}
-
-static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* Request a connection */
-       return c2_llp_connect(cm_id, iw_param);
-}
-
-static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       /* Accept the new connection */
-       return c2_llp_accept(cm_id, iw_param);
-}
-
-static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       return c2_llp_reject(cm_id, pdata, pdata_len);
-}
-
-static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-       int err;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       err = c2_llp_service_create(cm_id, backlog);
-       pr_debug("%s:%u err=%d\n",
-               __func__, __LINE__,
-               err);
-       return err;
-}
-
-static int c2_service_destroy(struct iw_cm_id *cm_id)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-
-       return c2_llp_service_destroy(cm_id);
-}
-
-static int c2_pseudo_up(struct net_device *netdev)
-{
-       struct in_device *ind;
-       struct c2_dev *c2dev = netdev->ml_priv;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       pr_debug("adding...\n");
-       for_ifa(ind) {
-#ifdef DEBUG
-               u8 *ip = (u8 *) & ifa->ifa_address;
-
-               pr_debug("%s: %d.%d.%d.%d\n",
-                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-               c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-
-       return 0;
-}
-
-static int c2_pseudo_down(struct net_device *netdev)
-{
-       struct in_device *ind;
-       struct c2_dev *c2dev = netdev->ml_priv;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       pr_debug("deleting...\n");
-       for_ifa(ind) {
-#ifdef DEBUG
-               u8 *ip = (u8 *) & ifa->ifa_address;
-
-               pr_debug("%s: %d.%d.%d.%d\n",
-                      ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-               c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-
-       return 0;
-}
-
-static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-       kfree_skb(skb);
-       return NETDEV_TX_OK;
-}
-
-static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-               return -EINVAL;
-
-       netdev->mtu = new_mtu;
-
-       /* TODO: Tell rnic about new rmda interface mtu */
-       return 0;
-}
-
-static const struct net_device_ops c2_pseudo_netdev_ops = {
-       .ndo_open               = c2_pseudo_up,
-       .ndo_stop               = c2_pseudo_down,
-       .ndo_start_xmit         = c2_pseudo_xmit_frame,
-       .ndo_change_mtu         = c2_pseudo_change_mtu,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void setup(struct net_device *netdev)
-{
-       netdev->netdev_ops = &c2_pseudo_netdev_ops;
-
-       netdev->watchdog_timeo = 0;
-       netdev->type = ARPHRD_ETHER;
-       netdev->mtu = 1500;
-       netdev->hard_header_len = ETH_HLEN;
-       netdev->addr_len = ETH_ALEN;
-       netdev->tx_queue_len = 0;
-       netdev->flags |= IFF_NOARP;
-}
-
-static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
-{
-       char name[IFNAMSIZ];
-       struct net_device *netdev;
-
-       /* change ethxxx to iwxxx */
-       strcpy(name, "iw");
-       strcat(name, &c2dev->netdev->name[3]);
-       netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
-       if (!netdev) {
-               printk(KERN_ERR PFX "%s -  etherdev alloc failed",
-                       __func__);
-               return NULL;
-       }
-
-       netdev->ml_priv = c2dev;
-
-       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-       memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
-
-       /* Print out the MAC address */
-       pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
-
-#if 0
-       /* Disable network packets */
-       netif_stop_queue(netdev);
-#endif
-       return netdev;
-}
-
-static int c2_port_immutable(struct ib_device *ibdev, u8 port_num,
-                            struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = c2_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
-       return 0;
-}
-
-int c2_register_device(struct c2_dev *dev)
-{
-       int ret = -ENOMEM;
-       int i;
-
-       /* Register pseudo network device */
-       dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
-       if (!dev->pseudo_netdev)
-               goto out;
-
-       ret = register_netdev(dev->pseudo_netdev);
-       if (ret)
-               goto out_free_netdev;
-
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
-       dev->ibdev.owner = THIS_MODULE;
-       dev->ibdev.uverbs_cmd_mask =
-           (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
-           (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
-           (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
-           (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
-           (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
-           (1ull << IB_USER_VERBS_CMD_REG_MR) |
-           (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
-           (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
-           (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
-           (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
-           (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
-           (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
-           (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
-           (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-           (1ull << IB_USER_VERBS_CMD_POST_RECV);
-
-       dev->ibdev.node_type = RDMA_NODE_RNIC;
-       memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-       memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
-       dev->ibdev.phys_port_cnt = 1;
-       dev->ibdev.num_comp_vectors = 1;
-       dev->ibdev.dma_device = &dev->pcidev->dev;
-       dev->ibdev.query_device = c2_query_device;
-       dev->ibdev.query_port = c2_query_port;
-       dev->ibdev.query_pkey = c2_query_pkey;
-       dev->ibdev.query_gid = c2_query_gid;
-       dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
-       dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
-       dev->ibdev.mmap = c2_mmap_uar;
-       dev->ibdev.alloc_pd = c2_alloc_pd;
-       dev->ibdev.dealloc_pd = c2_dealloc_pd;
-       dev->ibdev.create_ah = c2_ah_create;
-       dev->ibdev.destroy_ah = c2_ah_destroy;
-       dev->ibdev.create_qp = c2_create_qp;
-       dev->ibdev.modify_qp = c2_modify_qp;
-       dev->ibdev.destroy_qp = c2_destroy_qp;
-       dev->ibdev.create_cq = c2_create_cq;
-       dev->ibdev.destroy_cq = c2_destroy_cq;
-       dev->ibdev.poll_cq = c2_poll_cq;
-       dev->ibdev.get_dma_mr = c2_get_dma_mr;
-       dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
-       dev->ibdev.reg_user_mr = c2_reg_user_mr;
-       dev->ibdev.dereg_mr = c2_dereg_mr;
-       dev->ibdev.get_port_immutable = c2_port_immutable;
-
-       dev->ibdev.alloc_fmr = NULL;
-       dev->ibdev.unmap_fmr = NULL;
-       dev->ibdev.dealloc_fmr = NULL;
-       dev->ibdev.map_phys_fmr = NULL;
-
-       dev->ibdev.attach_mcast = c2_multicast_attach;
-       dev->ibdev.detach_mcast = c2_multicast_detach;
-       dev->ibdev.process_mad = c2_process_mad;
-
-       dev->ibdev.req_notify_cq = c2_arm_cq;
-       dev->ibdev.post_send = c2_post_send;
-       dev->ibdev.post_recv = c2_post_receive;
-
-       dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
-       if (dev->ibdev.iwcm == NULL) {
-               ret = -ENOMEM;
-               goto out_unregister_netdev;
-       }
-       dev->ibdev.iwcm->add_ref = c2_add_ref;
-       dev->ibdev.iwcm->rem_ref = c2_rem_ref;
-       dev->ibdev.iwcm->get_qp = c2_get_qp;
-       dev->ibdev.iwcm->connect = c2_connect;
-       dev->ibdev.iwcm->accept = c2_accept;
-       dev->ibdev.iwcm->reject = c2_reject;
-       dev->ibdev.iwcm->create_listen = c2_service_create;
-       dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
-
-       ret = ib_register_device(&dev->ibdev, NULL);
-       if (ret)
-               goto out_free_iwcm;
-
-       for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
-               ret = device_create_file(&dev->ibdev.dev,
-                                              c2_dev_attributes[i]);
-               if (ret)
-                       goto out_unregister_ibdev;
-       }
-       goto out;
-
-out_unregister_ibdev:
-       ib_unregister_device(&dev->ibdev);
-out_free_iwcm:
-       kfree(dev->ibdev.iwcm);
-out_unregister_netdev:
-       unregister_netdev(dev->pseudo_netdev);
-out_free_netdev:
-       free_netdev(dev->pseudo_netdev);
-out:
-       pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
-       return ret;
-}
-
-void c2_unregister_device(struct c2_dev *dev)
-{
-       pr_debug("%s:%u\n", __func__, __LINE__);
-       unregister_netdev(dev->pseudo_netdev);
-       free_netdev(dev->pseudo_netdev);
-       ib_unregister_device(&dev->ibdev);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.h b/drivers/staging/rdma/amso1100/c2_provider.h
deleted file mode 100644 (file)
index bf18998..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_PROVIDER_H
-#define C2_PROVIDER_H
-#include <linux/inetdevice.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-
-#include "c2_mq.h"
-#include <rdma/iw_cm.h>
-
-#define C2_MPT_FLAG_ATOMIC        (1 << 14)
-#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
-#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
-#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
-#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
-
-struct c2_buf_list {
-       void *buf;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-};
-
-
-/* The user context keeps track of objects allocated for a
- * particular user-mode client. */
-struct c2_ucontext {
-       struct ib_ucontext ibucontext;
-};
-
-struct c2_mtt;
-
-/* All objects associated with a PD are kept in the
- * associated user context if present.
- */
-struct c2_pd {
-       struct ib_pd ibpd;
-       u32 pd_id;
-};
-
-struct c2_mr {
-       struct ib_mr ibmr;
-       struct c2_pd *pd;
-       struct ib_umem *umem;
-};
-
-struct c2_av;
-
-enum c2_ah_type {
-       C2_AH_ON_HCA,
-       C2_AH_PCI_POOL,
-       C2_AH_KMALLOC
-};
-
-struct c2_ah {
-       struct ib_ah ibah;
-};
-
-struct c2_cq {
-       struct ib_cq ibcq;
-       spinlock_t lock;
-       atomic_t refcount;
-       int cqn;
-       int is_kernel;
-       wait_queue_head_t wait;
-
-       u32 adapter_handle;
-       struct c2_mq mq;
-};
-
-struct c2_wq {
-       spinlock_t lock;
-};
-struct iw_cm_id;
-struct c2_qp {
-       struct ib_qp ibqp;
-       struct iw_cm_id *cm_id;
-       spinlock_t lock;
-       atomic_t refcount;
-       wait_queue_head_t wait;
-       int qpn;
-
-       u32 adapter_handle;
-       u32 send_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u8 state;
-
-       struct c2_mq sq_mq;
-       struct c2_mq rq_mq;
-};
-
-struct c2_cr_query_attrs {
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-};
-
-static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
-{
-       return container_of(ibpd, struct c2_pd, ibpd);
-}
-
-static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
-{
-       return container_of(ibucontext, struct c2_ucontext, ibucontext);
-}
-
-static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
-{
-       return container_of(ibmr, struct c2_mr, ibmr);
-}
-
-
-static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
-{
-       return container_of(ibah, struct c2_ah, ibah);
-}
-
-static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
-{
-       return container_of(ibcq, struct c2_cq, ibcq);
-}
-
-static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
-{
-       return container_of(ibqp, struct c2_qp, ibqp);
-}
-
-static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
-{
-       struct in_device *ind;
-       int ret = 0;
-
-       ind = in_dev_get(netdev);
-       if (!ind)
-               return 0;
-
-       for_ifa(ind) {
-               if (ifa->ifa_address == addr) {
-                       ret = 1;
-                       break;
-               }
-       }
-       endfor_ifa(ind);
-       in_dev_put(ind);
-       return ret;
-}
-#endif                         /* C2_PROVIDER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
deleted file mode 100644 (file)
index ca364db..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_MAX_ORD_PER_QP 128
-#define C2_MAX_IRD_PER_QP 128
-
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-#define NO_SUPPORT -1
-static const u8 c2_opcode[] = {
-       [IB_WR_SEND] = C2_WR_TYPE_SEND,
-       [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
-       [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
-       [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
-       [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
-       [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
-       [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
-};
-
-static int to_c2_state(enum ib_qp_state ib_state)
-{
-       switch (ib_state) {
-       case IB_QPS_RESET:
-               return C2_QP_STATE_IDLE;
-       case IB_QPS_RTS:
-               return C2_QP_STATE_RTS;
-       case IB_QPS_SQD:
-               return C2_QP_STATE_CLOSING;
-       case IB_QPS_SQE:
-               return C2_QP_STATE_CLOSING;
-       case IB_QPS_ERR:
-               return C2_QP_STATE_ERROR;
-       default:
-               return -1;
-       }
-}
-
-static int to_ib_state(enum c2_qp_state c2_state)
-{
-       switch (c2_state) {
-       case C2_QP_STATE_IDLE:
-               return IB_QPS_RESET;
-       case C2_QP_STATE_CONNECTING:
-               return IB_QPS_RTR;
-       case C2_QP_STATE_RTS:
-               return IB_QPS_RTS;
-       case C2_QP_STATE_CLOSING:
-               return IB_QPS_SQD;
-       case C2_QP_STATE_ERROR:
-               return IB_QPS_ERR;
-       case C2_QP_STATE_TERMINATE:
-               return IB_QPS_SQE;
-       default:
-               return -1;
-       }
-}
-
-static const char *to_ib_state_str(int ib_state)
-{
-       static const char *state_str[] = {
-               "IB_QPS_RESET",
-               "IB_QPS_INIT",
-               "IB_QPS_RTR",
-               "IB_QPS_RTS",
-               "IB_QPS_SQD",
-               "IB_QPS_SQE",
-               "IB_QPS_ERR"
-       };
-       if (ib_state < IB_QPS_RESET ||
-           ib_state > IB_QPS_ERR)
-               return "<invalid IB QP state>";
-
-       ib_state -= IB_QPS_RESET;
-       return state_str[ib_state];
-}
-
-void c2_set_qp_state(struct c2_qp *qp, int c2_state)
-{
-       int new_state = to_ib_state(c2_state);
-
-       pr_debug("%s: qp[%p] state modify %s --> %s\n",
-              __func__,
-               qp,
-               to_ib_state_str(qp->state),
-               to_ib_state_str(new_state));
-       qp->state = new_state;
-}
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-                struct ib_qp_attr *attr, int attr_mask)
-{
-       struct c2wr_qp_modify_req wr;
-       struct c2wr_qp_modify_rep *reply;
-       struct c2_vq_req *vq_req;
-       unsigned long flags;
-       u8 next_state;
-       int err;
-
-       pr_debug("%s:%d qp=%p, %s --> %s\n",
-               __func__, __LINE__,
-               qp,
-               to_ib_state_str(qp->state),
-               to_ib_state_str(attr->qp_state));
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-       wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-       if (attr_mask & IB_QP_STATE) {
-               /* Ensure the state is valid */
-               if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
-                       err = -EINVAL;
-                       goto bail0;
-               }
-
-               wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
-
-               if (attr->qp_state == IB_QPS_ERR) {
-                       spin_lock_irqsave(&qp->lock, flags);
-                       if (qp->cm_id && qp->state == IB_QPS_RTS) {
-                               pr_debug("Generating CLOSE event for QP-->ERR, "
-                                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
-                               /* Generate an CLOSE event */
-                               vq_req->cm_id = qp->cm_id;
-                               vq_req->event = IW_CM_EVENT_CLOSE;
-                       }
-                       spin_unlock_irqrestore(&qp->lock, flags);
-               }
-               next_state =  attr->qp_state;
-
-       } else if (attr_mask & IB_QP_CUR_STATE) {
-
-               if (attr->cur_qp_state != IB_QPS_RTR &&
-                   attr->cur_qp_state != IB_QPS_RTS &&
-                   attr->cur_qp_state != IB_QPS_SQD &&
-                   attr->cur_qp_state != IB_QPS_SQE) {
-                       err = -EINVAL;
-                       goto bail0;
-               } else
-                       wr.next_qp_state =
-                           cpu_to_be32(to_c2_state(attr->cur_qp_state));
-
-               next_state = attr->cur_qp_state;
-
-       } else {
-               err = 0;
-               goto bail0;
-       }
-
-       /* reference the request struct */
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-       if (!err)
-               qp->state = next_state;
-#ifdef DEBUG
-       else
-               pr_debug("%s: c2_errno=%d\n", __func__, err);
-#endif
-       /*
-        * If we're going to error and generating the event here, then
-        * we need to remove the reference because there will be no
-        * close event generated by the adapter
-       */
-       spin_lock_irqsave(&qp->lock, flags);
-       if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
-               qp->cm_id->rem_ref(qp->cm_id);
-               qp->cm_id = NULL;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-
-       pr_debug("%s:%d qp=%p, cur_state=%s\n",
-               __func__, __LINE__,
-               qp,
-               to_ib_state_str(qp->state));
-       return err;
-}
-
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-                         int ord, int ird)
-{
-       struct c2wr_qp_modify_req wr;
-       struct c2wr_qp_modify_rep *reply;
-       struct c2_vq_req *vq_req;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-       wr.ord = cpu_to_be32(ord);
-       wr.ird = cpu_to_be32(ird);
-       wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-       wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-       /* reference the request struct */
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail0;
-
-       reply = (struct c2wr_qp_modify_rep *) (unsigned long)
-               vq_req->reply_msg;
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_qp_destroy_req wr;
-       struct c2wr_qp_destroy_rep *reply;
-       unsigned long flags;
-       int err;
-
-       /*
-        * Allocate a verb request message
-        */
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req) {
-               return -ENOMEM;
-       }
-
-       /*
-        * Initialize the WR
-        */
-       c2_wr_set_id(&wr, CCWR_QP_DESTROY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.qp_handle = qp->adapter_handle;
-
-       /*
-        * reference the request struct.  dereferenced in the int handler.
-        */
-       vq_req_get(c2dev, vq_req);
-
-       spin_lock_irqsave(&qp->lock, flags);
-       if (qp->cm_id && qp->state == IB_QPS_RTS) {
-               pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
-                       "qp=%p, cm_id=%p\n",qp,qp->cm_id);
-               /* Generate an CLOSE event */
-               vq_req->qp = qp;
-               vq_req->cm_id = qp->cm_id;
-               vq_req->event = IW_CM_EVENT_CLOSE;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       /*
-        * Send WR to adapter
-        */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       /*
-        * Wait for reply from adapter
-        */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       /*
-        * Process reply
-        */
-       reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       spin_lock_irqsave(&qp->lock, flags);
-       if (qp->cm_id) {
-               qp->cm_id->rem_ref(qp->cm_id);
-               qp->cm_id = NULL;
-       }
-       spin_unlock_irqrestore(&qp->lock, flags);
-
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       int ret;
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irq(&c2dev->qp_table.lock);
-
-       ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT);
-       if (ret >= 0)
-               qp->qpn = ret;
-
-       spin_unlock_irq(&c2dev->qp_table.lock);
-       idr_preload_end();
-       return ret < 0 ? ret : 0;
-}
-
-static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
-{
-       spin_lock_irq(&c2dev->qp_table.lock);
-       idr_remove(&c2dev->qp_table.idr, qpn);
-       spin_unlock_irq(&c2dev->qp_table.lock);
-}
-
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
-{
-       unsigned long flags;
-       struct c2_qp *qp;
-
-       spin_lock_irqsave(&c2dev->qp_table.lock, flags);
-       qp = idr_find(&c2dev->qp_table.idr, qpn);
-       spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
-       return qp;
-}
-
-int c2_alloc_qp(struct c2_dev *c2dev,
-               struct c2_pd *pd,
-               struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
-{
-       struct c2wr_qp_create_req wr;
-       struct c2wr_qp_create_rep *reply;
-       struct c2_vq_req *vq_req;
-       struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
-       struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
-       unsigned long peer_pa;
-       u32 q_size, msg_size, mmap_size;
-       void __iomem *mmap;
-       int err;
-
-       err = c2_alloc_qpn(c2dev, qp);
-       if (err)
-               return err;
-       qp->ibqp.qp_num = qp->qpn;
-       qp->ibqp.qp_type = IB_QPT_RC;
-
-       /* Allocate the SQ and RQ shared pointers */
-       qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                        &qp->sq_mq.shared_dma, GFP_KERNEL);
-       if (!qp->sq_mq.shared) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                        &qp->rq_mq.shared_dma, GFP_KERNEL);
-       if (!qp->rq_mq.shared) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       /* Allocate the verbs request */
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-
-       /* Initialize the work request */
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_QP_CREATE);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-       wr.sq_cq_handle = send_cq->adapter_handle;
-       wr.rq_cq_handle = recv_cq->adapter_handle;
-       wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
-       wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
-       wr.srq_handle = 0;
-       wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
-                              QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
-       wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-       wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
-       wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-       wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
-       wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
-       wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
-       wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
-       wr.pd_id = pd->pd_id;
-       wr.user_context = (unsigned long) qp;
-
-       vq_req_get(c2dev, vq_req);
-
-       /* Send the WR to the adapter */
-       err = vq_send_wr(c2dev, (union c2wr *) & wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail3;
-       }
-
-       /* Wait for the verb reply  */
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail3;
-       }
-
-       /* Process the reply */
-       reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail3;
-       }
-
-       if ((err = c2_wr_get_result(reply)) != 0) {
-               goto bail4;
-       }
-
-       /* Fill in the kernel QP struct */
-       atomic_set(&qp->refcount, 1);
-       qp->adapter_handle = reply->qp_handle;
-       qp->state = IB_QPS_RESET;
-       qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
-       qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
-       qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
-       init_waitqueue_head(&qp->wait);
-
-       /* Initialize the SQ MQ */
-       q_size = be32_to_cpu(reply->sq_depth);
-       msg_size = be32_to_cpu(reply->sq_msg_size);
-       peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
-       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-       mmap = ioremap_nocache(peer_pa, mmap_size);
-       if (!mmap) {
-               err = -ENOMEM;
-               goto bail5;
-       }
-
-       c2_mq_req_init(&qp->sq_mq,
-                      be32_to_cpu(reply->sq_mq_index),
-                      q_size,
-                      msg_size,
-                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
-                      mmap,                            /* peer */
-                      C2_MQ_ADAPTER_TARGET);
-
-       /* Initialize the RQ mq */
-       q_size = be32_to_cpu(reply->rq_depth);
-       msg_size = be32_to_cpu(reply->rq_msg_size);
-       peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
-       mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-       mmap = ioremap_nocache(peer_pa, mmap_size);
-       if (!mmap) {
-               err = -ENOMEM;
-               goto bail6;
-       }
-
-       c2_mq_req_init(&qp->rq_mq,
-                      be32_to_cpu(reply->rq_mq_index),
-                      q_size,
-                      msg_size,
-                      mmap + sizeof(struct c2_mq_shared),      /* pool start */
-                      mmap,                            /* peer */
-                      C2_MQ_ADAPTER_TARGET);
-
-       vq_repbuf_free(c2dev, reply);
-       vq_req_free(c2dev, vq_req);
-
-       return 0;
-
-bail6:
-       iounmap(qp->sq_mq.peer);
-bail5:
-       destroy_qp(c2dev, qp);
-bail4:
-       vq_repbuf_free(c2dev, reply);
-bail3:
-       vq_req_free(c2dev, vq_req);
-bail2:
-       c2_free_mqsp(qp->rq_mq.shared);
-bail1:
-       c2_free_mqsp(qp->sq_mq.shared);
-bail0:
-       c2_free_qpn(c2dev, qp->qpn);
-       return err;
-}
-
-static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_lock_irq(&send_cq->lock);
-       else if (send_cq > recv_cq) {
-               spin_lock_irq(&send_cq->lock);
-               spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
-       } else {
-               spin_lock_irq(&recv_cq->lock);
-               spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
-       }
-}
-
-static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-       if (send_cq == recv_cq)
-               spin_unlock_irq(&send_cq->lock);
-       else if (send_cq > recv_cq) {
-               spin_unlock(&recv_cq->lock);
-               spin_unlock_irq(&send_cq->lock);
-       } else {
-               spin_unlock(&send_cq->lock);
-               spin_unlock_irq(&recv_cq->lock);
-       }
-}
-
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-       struct c2_cq *send_cq;
-       struct c2_cq *recv_cq;
-
-       send_cq = to_c2cq(qp->ibqp.send_cq);
-       recv_cq = to_c2cq(qp->ibqp.recv_cq);
-
-       /*
-        * Lock CQs here, so that CQ polling code can do QP lookup
-        * without taking a lock.
-        */
-       c2_lock_cqs(send_cq, recv_cq);
-       c2_free_qpn(c2dev, qp->qpn);
-       c2_unlock_cqs(send_cq, recv_cq);
-
-       /*
-        * Destroy qp in the rnic...
-        */
-       destroy_qp(c2dev, qp);
-
-       /*
-        * Mark any unreaped CQEs as null and void.
-        */
-       c2_cq_clean(c2dev, qp, send_cq->cqn);
-       if (send_cq != recv_cq)
-               c2_cq_clean(c2dev, qp, recv_cq->cqn);
-       /*
-        * Unmap the MQs and return the shared pointers
-        * to the message pool.
-        */
-       iounmap(qp->sq_mq.peer);
-       iounmap(qp->rq_mq.peer);
-       c2_free_mqsp(qp->sq_mq.shared);
-       c2_free_mqsp(qp->rq_mq.shared);
-
-       atomic_dec(&qp->refcount);
-       wait_event(qp->wait, !atomic_read(&qp->refcount));
-}
-
-/*
- * Function: move_sgl
- *
- * Description:
- * Move an SGL from the user's work request struct into a CCIL Work Request
- * message, swapping to WR byte order and ensure the total length doesn't
- * overflow.
- *
- * IN:
- * dst         - ptr to CCIL Work Request message SGL memory.
- * src         - ptr to the consumers SGL memory.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int
-move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
-        u8 * actual_count)
-{
-       u32 tot = 0;            /* running total */
-       u8 acount = 0;          /* running total non-0 len sge's */
-
-       while (count > 0) {
-               /*
-                * If the addition of this SGE causes the
-                * total SGL length to exceed 2^32-1, then
-                * fail-n-bail.
-                *
-                * If the current total plus the next element length
-                * wraps, then it will go negative and be less than the
-                * current total...
-                */
-               if ((tot + src->length) < tot) {
-                       return -EINVAL;
-               }
-               /*
-                * Bug: 1456 (as well as 1498 & 1643)
-                * Skip over any sge's supplied with len=0
-                */
-               if (src->length) {
-                       tot += src->length;
-                       dst->stag = cpu_to_be32(src->lkey);
-                       dst->to = cpu_to_be64(src->addr);
-                       dst->length = cpu_to_be32(src->length);
-                       dst++;
-                       acount++;
-               }
-               src++;
-               count--;
-       }
-
-       if (acount == 0) {
-               /*
-                * Bug: 1476 (as well as 1498, 1456 and 1643)
-                * Setup the SGL in the WR to make it easier for the RNIC.
-                * This way, the FW doesn't have to deal with special cases.
-                * Setting length=0 should be sufficient.
-                */
-               dst->stag = 0;
-               dst->to = 0;
-               dst->length = 0;
-       }
-
-       *p_len = tot;
-       *actual_count = acount;
-       return 0;
-}
-
-/*
- * Function: c2_activity (private function)
- *
- * Description:
- * Post an mq index to the host->adapter activity fifo.
- *
- * IN:
- * c2dev       - ptr to c2dev structure
- * mq_index    - mq index to post
- * shared      - value most recently written to shared
- *
- * OUT:
- *
- * Return:
- * none
- */
-static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
-{
-       /*
-        * First read the register to see if the FIFO is full, and if so,
-        * spin until it's not.  This isn't perfect -- there is no
-        * synchronization among the clients of the register, but in
-        * practice it prevents multiple CPU from hammering the bus
-        * with PCI RETRY. Note that when this does happen, the card
-        * cannot get on the bus and the card and system hang in a
-        * deadlock -- thus the need for this code. [TOT]
-        */
-       while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
-               udelay(10);
-
-       __raw_writel(C2_HINT_MAKE(mq_index, shared),
-                    c2dev->regs + PCI_BAR0_ADAPTER_HINT);
-}
-
-/*
- * Function: qp_wr_post
- *
- * Description:
- * This in-line function allocates a MQ msg, then moves the host-copy of
- * the completed WR into msg.  Then it posts the message.
- *
- * IN:
- * q           - ptr to user MQ.
- * wr          - ptr to host-copy of the WR.
- * qp          - ptr to user qp
- * size                - Number of bytes to post.  Assumed to be divisible by 4.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
-{
-       union c2wr *msg;
-
-       msg = c2_mq_alloc(q);
-       if (msg == NULL) {
-               return -EINVAL;
-       }
-#ifdef CCMSGMAGIC
-       ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-
-       /*
-        * Since all header fields in the WR are the same as the
-        * CQE, set the following so the adapter need not.
-        */
-       c2_wr_set_result(wr, CCERR_PENDING);
-
-       /*
-        * Copy the wr down to the adapter
-        */
-       memcpy((void *) msg, (void *) wr, size);
-
-       c2_mq_produce(q);
-       return 0;
-}
-
-
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-                struct ib_send_wr **bad_wr)
-{
-       struct c2_dev *c2dev = to_c2dev(ibqp->device);
-       struct c2_qp *qp = to_c2qp(ibqp);
-       union c2wr wr;
-       unsigned long lock_flags;
-       int err = 0;
-
-       u32 flags;
-       u32 tot_len;
-       u8 actual_sge_count;
-       u32 msg_size;
-
-       if (qp->state > IB_QPS_RTS) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       while (ib_wr) {
-
-               flags = 0;
-               wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-               if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-                       flags |= SQ_SIGNALED;
-               }
-
-               switch (ib_wr->opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_INV:
-                       if (ib_wr->opcode == IB_WR_SEND) {
-                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
-                               else
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
-                               wr.sqwr.send.remote_stag = 0;
-                       } else {
-                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
-                               else
-                                       c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
-                               wr.sqwr.send.remote_stag =
-                                       cpu_to_be32(ib_wr->ex.invalidate_rkey);
-                       }
-
-                       msg_size = sizeof(struct c2wr_send_req) +
-                               sizeof(struct c2_data_addr) * ib_wr->num_sge;
-                       if (ib_wr->num_sge > qp->send_sgl_depth) {
-                               err = -EINVAL;
-                               break;
-                       }
-                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                               flags |= SQ_READ_FENCE;
-                       }
-                       err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
-                                      ib_wr->sg_list,
-                                      ib_wr->num_sge,
-                                      &tot_len, &actual_sge_count);
-                       wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
-                       c2_wr_set_sge_count(&wr, actual_sge_count);
-                       break;
-               case IB_WR_RDMA_WRITE:
-                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
-                       msg_size = sizeof(struct c2wr_rdma_write_req) +
-                           (sizeof(struct c2_data_addr) * ib_wr->num_sge);
-                       if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
-                               err = -EINVAL;
-                               break;
-                       }
-                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                               flags |= SQ_READ_FENCE;
-                       }
-                       wr.sqwr.rdma_write.remote_stag =
-                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
-                       wr.sqwr.rdma_write.remote_to =
-                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-                       err = move_sgl((struct c2_data_addr *)
-                                      & (wr.sqwr.rdma_write.data),
-                                      ib_wr->sg_list,
-                                      ib_wr->num_sge,
-                                      &tot_len, &actual_sge_count);
-                       wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
-                       c2_wr_set_sge_count(&wr, actual_sge_count);
-                       break;
-               case IB_WR_RDMA_READ:
-                       c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
-                       msg_size = sizeof(struct c2wr_rdma_read_req);
-
-                       /* IWarp only suppots 1 sge for RDMA reads */
-                       if (ib_wr->num_sge > 1) {
-                               err = -EINVAL;
-                               break;
-                       }
-
-                       /*
-                        * Move the local and remote stag/to/len into the WR.
-                        */
-                       wr.sqwr.rdma_read.local_stag =
-                           cpu_to_be32(ib_wr->sg_list->lkey);
-                       wr.sqwr.rdma_read.local_to =
-                           cpu_to_be64(ib_wr->sg_list->addr);
-                       wr.sqwr.rdma_read.remote_stag =
-                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
-                       wr.sqwr.rdma_read.remote_to =
-                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-                       wr.sqwr.rdma_read.length =
-                           cpu_to_be32(ib_wr->sg_list->length);
-                       break;
-               default:
-                       /* error */
-                       msg_size = 0;
-                       err = -EINVAL;
-                       break;
-               }
-
-               /*
-                * If we had an error on the last wr build, then
-                * break out.  Possible errors include bogus WR
-                * type, and a bogus SGL length...
-                */
-               if (err) {
-                       break;
-               }
-
-               /*
-                * Store flags
-                */
-               c2_wr_set_flags(&wr, flags);
-
-               /*
-                * Post the puppy!
-                */
-               spin_lock_irqsave(&qp->lock, lock_flags);
-               err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
-               if (err) {
-                       spin_unlock_irqrestore(&qp->lock, lock_flags);
-                       break;
-               }
-
-               /*
-                * Enqueue mq index to activity FIFO.
-                */
-               c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
-               spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-               ib_wr = ib_wr->next;
-       }
-
-out:
-       if (err)
-               *bad_wr = ib_wr;
-       return err;
-}
-
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-                   struct ib_recv_wr **bad_wr)
-{
-       struct c2_dev *c2dev = to_c2dev(ibqp->device);
-       struct c2_qp *qp = to_c2qp(ibqp);
-       union c2wr wr;
-       unsigned long lock_flags;
-       int err = 0;
-
-       if (qp->state > IB_QPS_RTS) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       /*
-        * Try and post each work request
-        */
-       while (ib_wr) {
-               u32 tot_len;
-               u8 actual_sge_count;
-
-               if (ib_wr->num_sge > qp->recv_sgl_depth) {
-                       err = -EINVAL;
-                       break;
-               }
-
-               /*
-                * Create local host-copy of the WR
-                */
-               wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-               c2_wr_set_id(&wr, CCWR_RECV);
-               c2_wr_set_flags(&wr, 0);
-
-               /* sge_count is limited to eight bits. */
-               BUG_ON(ib_wr->num_sge >= 256);
-               err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
-                              ib_wr->sg_list,
-                              ib_wr->num_sge, &tot_len, &actual_sge_count);
-               c2_wr_set_sge_count(&wr, actual_sge_count);
-
-               /*
-                * If we had an error on the last wr build, then
-                * break out.  Possible errors include bogus WR
-                * type, and a bogus SGL length...
-                */
-               if (err) {
-                       break;
-               }
-
-               spin_lock_irqsave(&qp->lock, lock_flags);
-               err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
-               if (err) {
-                       spin_unlock_irqrestore(&qp->lock, lock_flags);
-                       break;
-               }
-
-               /*
-                * Enqueue mq index to activity FIFO
-                */
-               c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
-               spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-               ib_wr = ib_wr->next;
-       }
-
-out:
-       if (err)
-               *bad_wr = ib_wr;
-       return err;
-}
-
-void c2_init_qp_table(struct c2_dev *c2dev)
-{
-       spin_lock_init(&c2dev->qp_table.lock);
-       idr_init(&c2dev->qp_table.idr);
-}
-
-void c2_cleanup_qp_table(struct c2_dev *c2dev)
-{
-       idr_destroy(&c2dev->qp_table.idr);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_rnic.c b/drivers/staging/rdma/amso1100/c2_rnic.c
deleted file mode 100644 (file)
index 5e65c6d..0000000
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/inet.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <linux/route.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_vq.h"
-
-/* Device capabilities */
-#define C2_MIN_PAGESIZE  1024
-
-#define C2_MAX_MRS       32768
-#define C2_MAX_QPS       16000
-#define C2_MAX_WQE_SZ    256
-#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
-#define C2_MAX_SGES      4
-#define C2_MAX_SGE_RD    1
-#define C2_MAX_CQS       32768
-#define C2_MAX_CQES      4096
-#define C2_MAX_PDS       16384
-
-/*
- * Send the adapter INIT message to the amso1100
- */
-static int c2_adapter_init(struct c2_dev *c2dev)
-{
-       struct c2wr_init_req wr;
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_INIT);
-       wr.hdr.context = 0;
-       wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
-       wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
-       wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
-       wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
-       wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
-       wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
-
-       /* Post the init message */
-       return vq_send_wr(c2dev, (union c2wr *) & wr);
-}
-
-/*
- * Send the adapter TERM message to the amso1100
- */
-static void c2_adapter_term(struct c2_dev *c2dev)
-{
-       struct c2wr_init_req wr;
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_TERM);
-       wr.hdr.context = 0;
-
-       /* Post the init message */
-       vq_send_wr(c2dev, (union c2wr *) & wr);
-       c2dev->init = 0;
-
-       return;
-}
-
-/*
- * Query the adapter
- */
-static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_query_req wr;
-       struct c2wr_rnic_query_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
-       wr.hdr.context = (unsigned long) vq_req;
-       wr.rnic_handle = c2dev->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply)
-               err = -ENOMEM;
-       else
-               err = c2_errno(reply);
-       if (err)
-               goto bail2;
-
-       props->fw_ver =
-               ((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
-               ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
-               (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
-       memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
-       props->max_mr_size         = 0xFFFFFFFF;
-       props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
-       props->vendor_id           = be32_to_cpu(reply->vendor_id);
-       props->vendor_part_id      = be32_to_cpu(reply->part_number);
-       props->hw_ver              = be32_to_cpu(reply->hw_version);
-       props->max_qp              = be32_to_cpu(reply->max_qps);
-       props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
-       props->device_cap_flags    = c2dev->device_cap_flags;
-       props->max_sge             = C2_MAX_SGES;
-       props->max_sge_rd          = C2_MAX_SGE_RD;
-       props->max_cq              = be32_to_cpu(reply->max_cqs);
-       props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
-       props->max_mr              = be32_to_cpu(reply->max_mrs);
-       props->max_pd              = be32_to_cpu(reply->max_pds);
-       props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
-       props->max_ee_rd_atom      = 0;
-       props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
-       props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
-       props->max_ee_init_rd_atom = 0;
-       props->atomic_cap          = IB_ATOMIC_NONE;
-       props->max_ee              = 0;
-       props->max_rdd             = 0;
-       props->max_mw              = be32_to_cpu(reply->max_mws);
-       props->max_raw_ipv6_qp     = 0;
-       props->max_raw_ethy_qp     = 0;
-       props->max_mcast_grp       = 0;
-       props->max_mcast_qp_attach = 0;
-       props->max_total_mcast_qp_attach = 0;
-       props->max_ah              = 0;
-       props->max_fmr             = 0;
-       props->max_map_per_fmr     = 0;
-       props->max_srq             = 0;
-       props->max_srq_wr          = 0;
-       props->max_srq_sge         = 0;
-       props->max_pkeys           = 0;
-       props->local_ca_ack_delay  = 0;
-
- bail2:
-       vq_repbuf_free(c2dev, reply);
-
- bail1:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Add an IP address to the RNIC interface
- */
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_setconfig_req *wr;
-       struct c2wr_rnic_setconfig_rep *reply;
-       struct c2_netaddr netaddr;
-       int err, len;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       len = sizeof(struct c2_netaddr);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
-
-       netaddr.ip_addr = inaddr;
-       netaddr.netmask = inmask;
-       netaddr.mtu = 0;
-
-       memcpy(wr->data, &netaddr, len);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Delete an IP address from the RNIC interface
- */
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-       struct c2_vq_req *vq_req;
-       struct c2wr_rnic_setconfig_req *wr;
-       struct c2wr_rnic_setconfig_rep *reply;
-       struct c2_netaddr netaddr;
-       int err, len;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (!vq_req)
-               return -ENOMEM;
-
-       len = sizeof(struct c2_netaddr);
-       wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-       if (!wr) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-       wr->hdr.context = (unsigned long) vq_req;
-       wr->rnic_handle = c2dev->adapter_handle;
-       wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
-
-       netaddr.ip_addr = inaddr;
-       netaddr.netmask = inmask;
-       netaddr.mtu = 0;
-
-       memcpy(wr->data, &netaddr, len);
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, (union c2wr *) wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail1;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err)
-               goto bail1;
-
-       reply =
-           (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       err = c2_errno(reply);
-       vq_repbuf_free(c2dev, reply);
-
-bail1:
-       kfree(wr);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Open a single RNIC instance to use with all
- * low level openib calls
- */
-static int c2_rnic_open(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *vq_req;
-       union c2wr wr;
-       struct c2wr_rnic_open_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               return -ENOMEM;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
-       wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
-       wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
-       wr.rnic_open.req.port_num = cpu_to_be16(0);
-       wr.rnic_open.req.user_context = (unsigned long) c2dev;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       if ((err = c2_errno(reply)) != 0) {
-               goto bail1;
-       }
-
-       c2dev->adapter_handle = reply->rnic_handle;
-
-bail1:
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Close the RNIC instance
- */
-static int c2_rnic_close(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *vq_req;
-       union c2wr wr;
-       struct c2wr_rnic_close_rep *reply;
-       int err;
-
-       vq_req = vq_req_alloc(c2dev);
-       if (vq_req == NULL) {
-               return -ENOMEM;
-       }
-
-       memset(&wr, 0, sizeof(wr));
-       c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
-       wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
-       wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
-
-       vq_req_get(c2dev, vq_req);
-
-       err = vq_send_wr(c2dev, &wr);
-       if (err) {
-               vq_req_put(c2dev, vq_req);
-               goto bail0;
-       }
-
-       err = vq_wait_for_reply(c2dev, vq_req);
-       if (err) {
-               goto bail0;
-       }
-
-       reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
-       if (!reply) {
-               err = -ENOMEM;
-               goto bail0;
-       }
-
-       if ((err = c2_errno(reply)) != 0) {
-               goto bail1;
-       }
-
-       c2dev->adapter_handle = 0;
-
-bail1:
-       vq_repbuf_free(c2dev, reply);
-bail0:
-       vq_req_free(c2dev, vq_req);
-       return err;
-}
-
-/*
- * Called by c2_probe to initialize the RNIC. This principally
- * involves initializing the various limits and resource pools that
- * comprise the RNIC instance.
- */
-int c2_rnic_init(struct c2_dev *c2dev)
-{
-       int err;
-       u32 qsize, msgsize;
-       void *q1_pages;
-       void *q2_pages;
-       void __iomem *mmio_regs;
-
-       /* Device capabilities */
-       c2dev->device_cap_flags =
-           (IB_DEVICE_RESIZE_MAX_WR |
-            IB_DEVICE_CURR_QP_STATE_MOD |
-            IB_DEVICE_SYS_IMAGE_GUID |
-            IB_DEVICE_LOCAL_DMA_LKEY |
-            IB_DEVICE_MEM_WINDOW);
-
-       /* Allocate the qptr_array */
-       c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
-       if (!c2dev->qptr_array) {
-               return -ENOMEM;
-       }
-
-       /* Initialize the qptr_array */
-       c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
-       c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
-       c2dev->qptr_array[2] = (void *) &c2dev->aeq;
-
-       /* Initialize data structures */
-       init_waitqueue_head(&c2dev->req_vq_wo);
-       spin_lock_init(&c2dev->vqlock);
-       spin_lock_init(&c2dev->lock);
-
-       /* Allocate MQ shared pointer pool for kernel clients. User
-        * mode client pools are hung off the user context
-        */
-       err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
-       if (err) {
-               goto bail0;
-       }
-
-       /* Allocate shared pointers for Q0, Q1, and Q2 from
-        * the shared pointer pool.
-        */
-
-       c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->hint_count_dma,
-                                            GFP_KERNEL);
-       c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->req_vq.shared_dma,
-                                            GFP_KERNEL);
-       c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                            &c2dev->rep_vq.shared_dma,
-                                            GFP_KERNEL);
-       c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-                                         &c2dev->aeq.shared_dma, GFP_KERNEL);
-       if (!c2dev->hint_count || !c2dev->req_vq.shared ||
-           !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-
-       mmio_regs = c2dev->kva;
-       /* Initialize the Verbs Request Queue */
-       c2_mq_req_init(&c2dev->req_vq, 0,
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
-                      C2_MQ_ADAPTER_TARGET);
-
-       /* Initialize the Verbs Reply Queue */
-       qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
-       msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
-       q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-                                     &c2dev->rep_vq.host_dma, GFP_KERNEL);
-       if (!q1_pages) {
-               err = -ENOMEM;
-               goto bail1;
-       }
-       dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
-       pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
-                (unsigned long long) c2dev->rep_vq.host_dma);
-       c2_mq_rep_init(&c2dev->rep_vq,
-                  1,
-                  qsize,
-                  msgsize,
-                  q1_pages,
-                  mmio_regs +
-                  be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
-                  C2_MQ_HOST_TARGET);
-
-       /* Initialize the Asynchronus Event Queue */
-       qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
-       msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
-       q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-                                     &c2dev->aeq.host_dma, GFP_KERNEL);
-       if (!q2_pages) {
-               err = -ENOMEM;
-               goto bail2;
-       }
-       dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
-       pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
-                (unsigned long long) c2dev->aeq.host_dma);
-       c2_mq_rep_init(&c2dev->aeq,
-                      2,
-                      qsize,
-                      msgsize,
-                      q2_pages,
-                      mmio_regs +
-                      be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
-                      C2_MQ_HOST_TARGET);
-
-       /* Initialize the verbs request allocator */
-       err = vq_init(c2dev);
-       if (err)
-               goto bail3;
-
-       /* Enable interrupts on the adapter */
-       writel(0, c2dev->regs + C2_IDIS);
-
-       /* create the WR init message */
-       err = c2_adapter_init(c2dev);
-       if (err)
-               goto bail4;
-       c2dev->init++;
-
-       /* open an adapter instance */
-       err = c2_rnic_open(c2dev);
-       if (err)
-               goto bail4;
-
-       /* Initialize cached the adapter limits */
-       err = c2_rnic_query(c2dev, &c2dev->props);
-       if (err)
-               goto bail5;
-
-       /* Initialize the PD pool */
-       err = c2_init_pd_table(c2dev);
-       if (err)
-               goto bail5;
-
-       /* Initialize the QP pool */
-       c2_init_qp_table(c2dev);
-       return 0;
-
-bail5:
-       c2_rnic_close(c2dev);
-bail4:
-       vq_term(c2dev);
-bail3:
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->aeq.q_size * c2dev->aeq.msg_size,
-                         q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
-bail2:
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-                         q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
-bail1:
-       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-bail0:
-       vfree(c2dev->qptr_array);
-
-       return err;
-}
-
-/*
- * Called by c2_remove to cleanup the RNIC resources.
- */
-void c2_rnic_term(struct c2_dev *c2dev)
-{
-
-       /* Close the open adapter instance */
-       c2_rnic_close(c2dev);
-
-       /* Send the TERM message to the adapter */
-       c2_adapter_term(c2dev);
-
-       /* Disable interrupts on the adapter */
-       writel(1, c2dev->regs + C2_IDIS);
-
-       /* Free the QP pool */
-       c2_cleanup_qp_table(c2dev);
-
-       /* Free the PD pool */
-       c2_cleanup_pd_table(c2dev);
-
-       /* Free the verbs request allocator */
-       vq_term(c2dev);
-
-       /* Free the asynchronus event queue */
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->aeq.q_size * c2dev->aeq.msg_size,
-                         c2dev->aeq.msg_pool.host,
-                         dma_unmap_addr(&c2dev->aeq, mapping));
-
-       /* Free the verbs reply queue */
-       dma_free_coherent(&c2dev->pcidev->dev,
-                         c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-                         c2dev->rep_vq.msg_pool.host,
-                         dma_unmap_addr(&c2dev->rep_vq, mapping));
-
-       /* Free the MQ shared pointer pool */
-       c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-
-       /* Free the qptr_array */
-       vfree(c2dev->qptr_array);
-
-       return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_status.h b/drivers/staging/rdma/amso1100/c2_status.h
deleted file mode 100644 (file)
index 6ee4aa9..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef        _C2_STATUS_H_
-#define _C2_STATUS_H_
-
-/*
- * Verbs Status Codes
- */
-enum c2_status {
-       C2_OK = 0,              /* This must be zero */
-       CCERR_INSUFFICIENT_RESOURCES = 1,
-       CCERR_INVALID_MODIFIER = 2,
-       CCERR_INVALID_MODE = 3,
-       CCERR_IN_USE = 4,
-       CCERR_INVALID_RNIC = 5,
-       CCERR_INTERRUPTED_OPERATION = 6,
-       CCERR_INVALID_EH = 7,
-       CCERR_INVALID_CQ = 8,
-       CCERR_CQ_EMPTY = 9,
-       CCERR_NOT_IMPLEMENTED = 10,
-       CCERR_CQ_DEPTH_TOO_SMALL = 11,
-       CCERR_PD_IN_USE = 12,
-       CCERR_INVALID_PD = 13,
-       CCERR_INVALID_SRQ = 14,
-       CCERR_INVALID_ADDRESS = 15,
-       CCERR_INVALID_NETMASK = 16,
-       CCERR_INVALID_QP = 17,
-       CCERR_INVALID_QP_STATE = 18,
-       CCERR_TOO_MANY_WRS_POSTED = 19,
-       CCERR_INVALID_WR_TYPE = 20,
-       CCERR_INVALID_SGL_LENGTH = 21,
-       CCERR_INVALID_SQ_DEPTH = 22,
-       CCERR_INVALID_RQ_DEPTH = 23,
-       CCERR_INVALID_ORD = 24,
-       CCERR_INVALID_IRD = 25,
-       CCERR_QP_ATTR_CANNOT_CHANGE = 26,
-       CCERR_INVALID_STAG = 27,
-       CCERR_QP_IN_USE = 28,
-       CCERR_OUTSTANDING_WRS = 29,
-       CCERR_STAG_IN_USE = 30,
-       CCERR_INVALID_STAG_INDEX = 31,
-       CCERR_INVALID_SGL_FORMAT = 32,
-       CCERR_ADAPTER_TIMEOUT = 33,
-       CCERR_INVALID_CQ_DEPTH = 34,
-       CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
-       CCERR_INVALID_EP = 36,
-       CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
-       CCERR_FLUSHED = 38,
-       CCERR_INVALID_WQE = 39,
-       CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
-       CCERR_REMOTE_TERMINATION_ERROR = 41,
-       CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
-       CCERR_ACCESS_VIOLATION = 43,
-       CCERR_INVALID_PD_ID = 44,
-       CCERR_WRAP_ERROR = 45,
-       CCERR_INV_STAG_ACCESS_ERROR = 46,
-       CCERR_ZERO_RDMA_READ_RESOURCES = 47,
-       CCERR_QP_NOT_PRIVILEGED = 48,
-       CCERR_STAG_STATE_NOT_INVALID = 49,
-       CCERR_INVALID_PAGE_SIZE = 50,
-       CCERR_INVALID_BUFFER_SIZE = 51,
-       CCERR_INVALID_PBE = 52,
-       CCERR_INVALID_FBO = 53,
-       CCERR_INVALID_LENGTH = 54,
-       CCERR_INVALID_ACCESS_RIGHTS = 55,
-       CCERR_PBL_TOO_BIG = 56,
-       CCERR_INVALID_VA = 57,
-       CCERR_INVALID_REGION = 58,
-       CCERR_INVALID_WINDOW = 59,
-       CCERR_TOTAL_LENGTH_TOO_BIG = 60,
-       CCERR_INVALID_QP_ID = 61,
-       CCERR_ADDR_IN_USE = 62,
-       CCERR_ADDR_NOT_AVAIL = 63,
-       CCERR_NET_DOWN = 64,
-       CCERR_NET_UNREACHABLE = 65,
-       CCERR_CONN_ABORTED = 66,
-       CCERR_CONN_RESET = 67,
-       CCERR_NO_BUFS = 68,
-       CCERR_CONN_TIMEDOUT = 69,
-       CCERR_CONN_REFUSED = 70,
-       CCERR_HOST_UNREACHABLE = 71,
-       CCERR_INVALID_SEND_SGL_DEPTH = 72,
-       CCERR_INVALID_RECV_SGL_DEPTH = 73,
-       CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
-       CCERR_INSUFFICIENT_PRIVILEGES = 75,
-       CCERR_STACK_ERROR = 76,
-       CCERR_INVALID_VERSION = 77,
-       CCERR_INVALID_MTU = 78,
-       CCERR_INVALID_IMAGE = 79,
-       CCERR_PENDING = 98,     /* not an error; user internally by adapter */
-       CCERR_DEFER = 99,       /* not an error; used internally by adapter */
-       CCERR_FAILED_WRITE = 100,
-       CCERR_FAILED_ERASE = 101,
-       CCERR_FAILED_VERIFICATION = 102,
-       CCERR_NOT_FOUND = 103,
-
-};
-
-/*
- * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
- */
-enum c2_connect_status {
-       C2_CONN_STATUS_SUCCESS = C2_OK,
-       C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
-       C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
-       C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
-       C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
-       C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
-       C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
-       C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
-       C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
-       C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
-       C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
-};
-
-/*
- * Flash programming status codes.
- */
-enum c2_flash_status {
-       C2_FLASH_STATUS_SUCCESS = 0x0000,
-       C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
-       C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
-       C2_FLASH_STATUS_ECLBS = 0x0400,
-       C2_FLASH_STATUS_PSLBS = 0x0800,
-       C2_FLASH_STATUS_VPENS = 0x1000,
-};
-
-#endif                         /* _C2_STATUS_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_user.h b/drivers/staging/rdma/amso1100/c2_user.h
deleted file mode 100644 (file)
index 7e9e7ad..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_USER_H
-#define C2_USER_H
-
-#include <linux/types.h>
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct c2_alloc_ucontext_resp {
-       __u32 qp_tab_size;
-       __u32 uarc_size;
-};
-
-struct c2_alloc_pd_resp {
-       __u32 pdn;
-       __u32 reserved;
-};
-
-struct c2_create_cq {
-       __u32 lkey;
-       __u32 pdn;
-       __u64 arm_db_page;
-       __u64 set_db_page;
-       __u32 arm_db_index;
-       __u32 set_db_index;
-};
-
-struct c2_create_cq_resp {
-       __u32 cqn;
-       __u32 reserved;
-};
-
-struct c2_create_qp {
-       __u32 lkey;
-       __u32 reserved;
-       __u64 sq_db_page;
-       __u64 rq_db_page;
-       __u32 sq_db_index;
-       __u32 rq_db_index;
-};
-
-#endif                         /* C2_USER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_vq.c b/drivers/staging/rdma/amso1100/c2_vq.c
deleted file mode 100644 (file)
index 2ec716f..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include "c2_vq.h"
-#include "c2_provider.h"
-
-/*
- * Verbs Request Objects:
- *
- * VQ Request Objects are allocated by the kernel verbs handlers.
- * They contain a wait object, a refcnt, an atomic bool indicating that the
- * adapter has replied, and a copy of the verb reply work request.
- * A pointer to the VQ Request Object is passed down in the context
- * field of the work request message, and reflected back by the adapter
- * in the verbs reply message.  The function handle_vq() in the interrupt
- * path will use this pointer to:
- *     1) append a copy of the verbs reply message
- *     2) mark that the reply is ready
- *     3) wake up the kernel verbs handler blocked awaiting the reply.
- *
- *
- * The kernel verbs handlers do a "get" to put a 2nd reference on the
- * VQ Request object.  If the kernel verbs handler exits before the adapter
- * can respond, this extra reference will keep the VQ Request object around
- * until the adapter's reply can be processed.  The reason we need this is
- * because a pointer to this object is stuffed into the context field of
- * the verbs work request message, and reflected back in the reply message.
- * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
- * kernel verb handler that is blocked awaiting the verb reply.
- * So handle_vq() will do a "put" on the object when it's done accessing it.
- * NOTE:  If we guarantee that the kernel verb handler will never bail before
- *        getting the reply, then we don't need these refcnts.
- *
- *
- * VQ Request objects are freed by the kernel verbs handlers only
- * after the verb has been processed, or when the adapter fails and
- * does not reply.
- *
- *
- * Verbs Reply Buffers:
- *
- * VQ Reply bufs are local host memory copies of a
- * outstanding Verb Request reply
- * message.  The are always allocated by the kernel verbs handlers, and _may_ be
- * freed by either the kernel verbs handler -or- the interrupt handler.  The
- * kernel verbs handler _must_ free the repbuf, then free the vq request object
- * in that order.
- */
-
-int vq_init(struct c2_dev *c2dev)
-{
-       sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
-               (char) ('0' + c2dev->devnum));
-       c2dev->host_msg_cache =
-           kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
-                             SLAB_HWCACHE_ALIGN, NULL);
-       if (c2dev->host_msg_cache == NULL) {
-               return -ENOMEM;
-       }
-       return 0;
-}
-
-void vq_term(struct c2_dev *c2dev)
-{
-       kmem_cache_destroy(c2dev->host_msg_cache);
-}
-
-/* vq_req_alloc - allocate a VQ Request Object and initialize it.
- * The refcnt is set to 1.
- */
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
-{
-       struct c2_vq_req *r;
-
-       r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
-       if (r) {
-               init_waitqueue_head(&r->wait_object);
-               r->reply_msg = 0;
-               r->event = 0;
-               r->cm_id = NULL;
-               r->qp = NULL;
-               atomic_set(&r->refcnt, 1);
-               atomic_set(&r->reply_ready, 0);
-       }
-       return r;
-}
-
-
-/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
- * has already free the VQ Reply Buffer if it existed.
- */
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       r->reply_msg = 0;
-       if (atomic_dec_and_test(&r->refcnt)) {
-               kfree(r);
-       }
-}
-
-/* vq_req_get - reference a VQ Request Object.  Done
- * only in the kernel verbs handlers.
- */
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       atomic_inc(&r->refcnt);
-}
-
-
-/* vq_req_put - dereference and potentially free a VQ Request Object.
- *
- * This is only called by handle_vq() on the
- * interrupt when it is done processing
- * a verb reply message.  If the associated
- * kernel verbs handler has already bailed,
- * then this put will actually free the VQ
- * Request object _and_ the VQ Reply Buffer
- * if it exists.
- */
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-       if (atomic_dec_and_test(&r->refcnt)) {
-               if (r->reply_msg != 0)
-                       vq_repbuf_free(c2dev,
-                                      (void *) (unsigned long) r->reply_msg);
-               kfree(r);
-       }
-}
-
-
-/*
- * vq_repbuf_alloc - allocate a VQ Reply Buffer.
- */
-void *vq_repbuf_alloc(struct c2_dev *c2dev)
-{
-       return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
-}
-
-/*
- * vq_send_wr - post a verbs request message to the Verbs Request Queue.
- * If a message is not available in the MQ, then block until one is available.
- * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
- * When the adapter drains the Verbs Request Queue,
- * it inserts MQ index 0 in to the
- * adapter->host activity fifo and interrupts the host.
- */
-int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
-{
-       void *msg;
-       wait_queue_t __wait;
-
-       /*
-        * grab adapter vq lock
-        */
-       spin_lock(&c2dev->vqlock);
-
-       /*
-        * allocate msg
-        */
-       msg = c2_mq_alloc(&c2dev->req_vq);
-
-       /*
-        * If we cannot get a msg, then we'll wait
-        * When a messages are available, the int handler will wake_up()
-        * any waiters.
-        */
-       while (msg == NULL) {
-               pr_debug("%s:%d no available msg in VQ, waiting...\n",
-                      __func__, __LINE__);
-               init_waitqueue_entry(&__wait, current);
-               add_wait_queue(&c2dev->req_vq_wo, &__wait);
-               spin_unlock(&c2dev->vqlock);
-               for (;;) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       if (!c2_mq_full(&c2dev->req_vq)) {
-                               break;
-                       }
-                       if (!signal_pending(current)) {
-                               schedule_timeout(1 * HZ);       /* 1 second... */
-                               continue;
-                       }
-                       set_current_state(TASK_RUNNING);
-                       remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-                       return -EINTR;
-               }
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-               spin_lock(&c2dev->vqlock);
-               msg = c2_mq_alloc(&c2dev->req_vq);
-       }
-
-       /*
-        * copy wr into adapter msg
-        */
-       memcpy(msg, wr, c2dev->req_vq.msg_size);
-
-       /*
-        * post msg
-        */
-       c2_mq_produce(&c2dev->req_vq);
-
-       /*
-        * release adapter vq lock
-        */
-       spin_unlock(&c2dev->vqlock);
-       return 0;
-}
-
-
-/*
- * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
- */
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
-{
-       if (!wait_event_timeout(req->wait_object,
-                               atomic_read(&req->reply_ready),
-                               60*HZ))
-               return -ETIMEDOUT;
-
-       return 0;
-}
-
-/*
- * vq_repbuf_free - Free a Verbs Reply Buffer.
- */
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
-{
-       kmem_cache_free(c2dev->host_msg_cache, reply);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_vq.h b/drivers/staging/rdma/amso1100/c2_vq.h
deleted file mode 100644 (file)
index c1f6cef..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_VQ_H_
-#define _C2_VQ_H_
-#include <linux/sched.h>
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_provider.h"
-
-struct c2_vq_req {
-       u64 reply_msg;          /* ptr to reply msg */
-       wait_queue_head_t wait_object;  /* wait object for vq reqs */
-       atomic_t reply_ready;   /* set when reply is ready */
-       atomic_t refcnt;        /* used to cancel WRs... */
-       int event;
-       struct iw_cm_id *cm_id;
-       struct c2_qp *qp;
-};
-
-int vq_init(struct c2_dev *c2dev);
-void vq_term(struct c2_dev *c2dev);
-
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
-int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
-
-void *vq_repbuf_alloc(struct c2_dev *c2dev);
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
-
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
-#endif                         /* _C2_VQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_wr.h b/drivers/staging/rdma/amso1100/c2_wr.h
deleted file mode 100644 (file)
index 8d4b4ca..0000000
+++ /dev/null
@@ -1,1520 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_WR_H_
-#define _C2_WR_H_
-
-#ifdef CCDEBUG
-#define CCWR_MAGIC             0xb07700b0
-#endif
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-/* Maximum allowed size in bytes of private_data exchange
- * on connect.
- */
-#define C2_MAX_PRIVATE_DATA_SIZE 200
-
-/*
- * These types are shared among the adapter, host, and CCIL consumer.
- */
-enum c2_cq_notification_type {
-       C2_CQ_NOTIFICATION_TYPE_NONE = 1,
-       C2_CQ_NOTIFICATION_TYPE_NEXT,
-       C2_CQ_NOTIFICATION_TYPE_NEXT_SE
-};
-
-enum c2_setconfig_cmd {
-       C2_CFG_ADD_ADDR = 1,
-       C2_CFG_DEL_ADDR = 2,
-       C2_CFG_ADD_ROUTE = 3,
-       C2_CFG_DEL_ROUTE = 4
-};
-
-enum c2_getconfig_cmd {
-       C2_GETCONFIG_ROUTES = 1,
-       C2_GETCONFIG_ADDRS
-};
-
-/*
- *  CCIL Work Request Identifiers
- */
-enum c2wr_ids {
-       CCWR_RNIC_OPEN = 1,
-       CCWR_RNIC_QUERY,
-       CCWR_RNIC_SETCONFIG,
-       CCWR_RNIC_GETCONFIG,
-       CCWR_RNIC_CLOSE,
-       CCWR_CQ_CREATE,
-       CCWR_CQ_QUERY,
-       CCWR_CQ_MODIFY,
-       CCWR_CQ_DESTROY,
-       CCWR_QP_CONNECT,
-       CCWR_PD_ALLOC,
-       CCWR_PD_DEALLOC,
-       CCWR_SRQ_CREATE,
-       CCWR_SRQ_QUERY,
-       CCWR_SRQ_MODIFY,
-       CCWR_SRQ_DESTROY,
-       CCWR_QP_CREATE,
-       CCWR_QP_QUERY,
-       CCWR_QP_MODIFY,
-       CCWR_QP_DESTROY,
-       CCWR_NSMR_STAG_ALLOC,
-       CCWR_NSMR_REGISTER,
-       CCWR_NSMR_PBL,
-       CCWR_STAG_DEALLOC,
-       CCWR_NSMR_REREGISTER,
-       CCWR_SMR_REGISTER,
-       CCWR_MR_QUERY,
-       CCWR_MW_ALLOC,
-       CCWR_MW_QUERY,
-       CCWR_EP_CREATE,
-       CCWR_EP_GETOPT,
-       CCWR_EP_SETOPT,
-       CCWR_EP_DESTROY,
-       CCWR_EP_BIND,
-       CCWR_EP_CONNECT,
-       CCWR_EP_LISTEN,
-       CCWR_EP_SHUTDOWN,
-       CCWR_EP_LISTEN_CREATE,
-       CCWR_EP_LISTEN_DESTROY,
-       CCWR_EP_QUERY,
-       CCWR_CR_ACCEPT,
-       CCWR_CR_REJECT,
-       CCWR_CONSOLE,
-       CCWR_TERM,
-       CCWR_FLASH_INIT,
-       CCWR_FLASH,
-       CCWR_BUF_ALLOC,
-       CCWR_BUF_FREE,
-       CCWR_FLASH_WRITE,
-       CCWR_INIT,              /* WARNING: Don't move this ever again! */
-
-
-
-       /* Add new IDs here */
-
-
-
-       /*
-        * WARNING: CCWR_LAST must always be the last verbs id defined!
-        *          All the preceding IDs are fixed, and must not change.
-        *          You can add new IDs, but must not remove or reorder
-        *          any IDs. If you do, YOU will ruin any hope of
-        *          compatibility between versions.
-        */
-       CCWR_LAST,
-
-       /*
-        * Start over at 1 so that arrays indexed by user wr id's
-        * begin at 1.  This is OK since the verbs and user wr id's
-        * are always used on disjoint sets of queues.
-        */
-       /*
-        * The order of the CCWR_SEND_XX verbs must
-        * match the order of the RDMA_OPs
-        */
-       CCWR_SEND = 1,
-       CCWR_SEND_INV,
-       CCWR_SEND_SE,
-       CCWR_SEND_SE_INV,
-       CCWR_RDMA_WRITE,
-       CCWR_RDMA_READ,
-       CCWR_RDMA_READ_INV,
-       CCWR_MW_BIND,
-       CCWR_NSMR_FASTREG,
-       CCWR_STAG_INVALIDATE,
-       CCWR_RECV,
-       CCWR_NOP,
-       CCWR_UNIMPL,
-/* WARNING: This must always be the last user wr id defined! */
-};
-#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
-
-/*
- * SQ/RQ Work Request Types
- */
-enum c2_wr_type {
-       C2_WR_TYPE_SEND = CCWR_SEND,
-       C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
-       C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
-       C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
-       C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
-       C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
-       C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
-       C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
-       C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
-       C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
-       C2_WR_TYPE_RECV = CCWR_RECV,
-       C2_WR_TYPE_NOP = CCWR_NOP,
-};
-
-struct c2_netaddr {
-       __be32 ip_addr;
-       __be32 netmask;
-       u32 mtu;
-};
-
-struct c2_route {
-       u32 ip_addr;            /* 0 indicates the default route */
-       u32 netmask;            /* netmask associated with dst */
-       u32 flags;
-       union {
-               u32 ipaddr;     /* address of the nexthop interface */
-               u8 enaddr[6];
-       } nexthop;
-};
-
-/*
- * A Scatter Gather Entry.
- */
-struct c2_data_addr {
-       __be32 stag;
-       __be32 length;
-       __be64 to;
-};
-
-/*
- * MR and MW flags used by the consumer, RI, and RNIC.
- */
-enum c2_mm_flags {
-       MEM_REMOTE = 0x0001,    /* allow mw binds with remote access. */
-       MEM_VA_BASED = 0x0002,  /* Not Zero-based */
-       MEM_PBL_COMPLETE = 0x0004,      /* PBL array is complete in this msg */
-       MEM_LOCAL_READ = 0x0008,        /* allow local reads */
-       MEM_LOCAL_WRITE = 0x0010,       /* allow local writes */
-       MEM_REMOTE_READ = 0x0020,       /* allow remote reads */
-       MEM_REMOTE_WRITE = 0x0040,      /* allow remote writes */
-       MEM_WINDOW_BIND = 0x0080,       /* binds allowed */
-       MEM_SHARED = 0x0100,    /* set if MR is shared */
-       MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */
-};
-
-/*
- * CCIL API ACF flags defined in terms of the low level mem flags.
- * This minimizes translation needed in the user API
- */
-enum c2_acf {
-       C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
-       C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
-       C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
-       C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
-       C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
-};
-
-/*
- * Image types of objects written to flash
- */
-#define C2_FLASH_IMG_BITFILE 1
-#define C2_FLASH_IMG_OPTION_ROM 2
-#define C2_FLASH_IMG_VPD 3
-
-/*
- *  to fix bug 1815 we define the max size allowable of the
- *  terminate message (per the IETF spec).Refer to the IETF
- *  protocol specification, section 12.1.6, page 64)
- *  The message is prefixed by 20 types of DDP info.
- *
- *  Then the message has 6 bytes for the terminate control
- *  and DDP segment length info plus a DDP header (either
- *  14 or 18 byts) plus 28 bytes for the RDMA header.
- *  Thus the max size in:
- *  20 + (6 + 18 + 28) = 72
- */
-#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
-
-/*
- * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
- */
-#define WR_BUILD_STR_LEN 64
-
-/*
- * WARNING:  All of these structs need to align any 64bit types on
- * 64 bit boundaries!  64bit types include u64 and u64.
- */
-
-/*
- * Clustercore Work Request Header.  Be sensitive to field layout
- * and alignment.
- */
-struct c2wr_hdr {
-       /* wqe_count is part of the cqe.  It is put here so the
-        * adapter can write to it while the wr is pending without
-        * clobbering part of the wr.  This word need not be dma'd
-        * from the host to adapter by libccil, but we copy it anyway
-        * to make the memcpy to the adapter better aligned.
-        */
-       __be32 wqe_count;
-
-       /* Put these fields next so that later 32- and 64-bit
-        * quantities are naturally aligned.
-        */
-       u8 id;
-       u8 result;              /* adapter -> host */
-       u8 sge_count;           /* host -> adapter */
-       u8 flags;               /* host -> adapter */
-
-       u64 context;
-#ifdef CCMSGMAGIC
-       u32 magic;
-       u32 pad;
-#endif
-} __attribute__((packed));
-
-/*
- *------------------------ RNIC ------------------------
- */
-
-/*
- * WR_RNIC_OPEN
- */
-
-/*
- * Flags for the RNIC WRs
- */
-enum c2_rnic_flags {
-       RNIC_IRD_STATIC = 0x0001,
-       RNIC_ORD_STATIC = 0x0002,
-       RNIC_QP_STATIC = 0x0004,
-       RNIC_SRQ_SUPPORTED = 0x0008,
-       RNIC_PBL_BLOCK_MODE = 0x0010,
-       RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
-       RNIC_CQ_OVF_DETECTED = 0x0040,
-       RNIC_PRIV_MODE = 0x0080
-};
-
-struct c2wr_rnic_open_req {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       __be16 flags;           /* See enum c2_rnic_flags */
-       __be16 port_num;
-} __attribute__((packed));
-
-struct c2wr_rnic_open_rep {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-union c2wr_rnic_open {
-       struct c2wr_rnic_open_req req;
-       struct c2wr_rnic_open_rep rep;
-} __attribute__((packed));
-
-struct c2wr_rnic_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_QUERY
- */
-struct c2wr_rnic_query_rep {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       __be32 vendor_id;
-       __be32 part_number;
-       __be32 hw_version;
-       __be32 fw_ver_major;
-       __be32 fw_ver_minor;
-       __be32 fw_ver_patch;
-       char fw_ver_build_str[WR_BUILD_STR_LEN];
-       __be32 max_qps;
-       __be32 max_qp_depth;
-       u32 max_srq_depth;
-       u32 max_send_sgl_depth;
-       u32 max_rdma_sgl_depth;
-       __be32 max_cqs;
-       __be32 max_cq_depth;
-       u32 max_cq_event_handlers;
-       __be32 max_mrs;
-       u32 max_pbl_depth;
-       __be32 max_pds;
-       __be32 max_global_ird;
-       u32 max_global_ord;
-       __be32 max_qp_ird;
-       __be32 max_qp_ord;
-       u32 flags;
-       __be32 max_mws;
-       u32 pbe_range_low;
-       u32 pbe_range_high;
-       u32 max_srqs;
-       u32 page_size;
-} __attribute__((packed));
-
-union c2wr_rnic_query {
-       struct c2wr_rnic_query_req req;
-       struct c2wr_rnic_query_rep rep;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_GETCONFIG
- */
-
-struct c2wr_rnic_getconfig_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 option;             /* see c2_getconfig_cmd_t */
-       u64 reply_buf;
-       u32 reply_buf_len;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_getconfig_rep {
-       struct c2wr_hdr hdr;
-       u32 option;             /* see c2_getconfig_cmd_t */
-       u32 count_len;          /* length of the number of addresses configured */
-} __attribute__((packed)) ;
-
-union c2wr_rnic_getconfig {
-       struct c2wr_rnic_getconfig_req req;
-       struct c2wr_rnic_getconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_SETCONFIG
- */
-struct c2wr_rnic_setconfig_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 option;          /* See c2_setconfig_cmd_t */
-       /* variable data and pad. See c2_netaddr and c2_route */
-       u8 data[0];
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_setconfig_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_setconfig {
-       struct c2wr_rnic_setconfig_req req;
-       struct c2wr_rnic_setconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_CLOSE
- */
-struct c2wr_rnic_close_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_close_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_close {
-       struct c2wr_rnic_close_req req;
-       struct c2wr_rnic_close_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ CQ ------------------------
- */
-struct c2wr_cq_create_req {
-       struct c2wr_hdr hdr;
-       __be64 shared_ht;
-       u64 user_context;
-       __be64 msg_pool;
-       u32 rnic_handle;
-       __be32 msg_size;
-       __be32 depth;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_create_rep {
-       struct c2wr_hdr hdr;
-       __be32 mq_index;
-       __be32 adapter_shared;
-       u32 cq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_cq_create {
-       struct c2wr_cq_create_req req;
-       struct c2wr_cq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 cq_handle;
-       u32 new_depth;
-       u64 new_msg_pool;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_modify {
-       struct c2wr_cq_modify_req req;
-       struct c2wr_cq_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 cq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_destroy {
-       struct c2wr_cq_destroy_req req;
-       struct c2wr_cq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ PD ------------------------
- */
-struct c2wr_pd_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_alloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_alloc {
-       struct c2wr_pd_alloc_req req;
-       struct c2wr_pd_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_dealloc {
-       struct c2wr_pd_dealloc_req req;
-       struct c2wr_pd_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ SRQ ------------------------
- */
-struct c2wr_srq_create_req {
-       struct c2wr_hdr hdr;
-       u64 shared_ht;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 srq_depth;
-       u32 srq_limit;
-       u32 sgl_depth;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_create_rep {
-       struct c2wr_hdr hdr;
-       u32 srq_depth;
-       u32 sgl_depth;
-       u32 msg_size;
-       u32 mq_index;
-       u32 mq_start;
-       u32 srq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_srq_create {
-       struct c2wr_srq_create_req req;
-       struct c2wr_srq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 srq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_srq_destroy {
-       struct c2wr_srq_destroy_req req;
-       struct c2wr_srq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ QP ------------------------
- */
-enum c2wr_qp_flags {
-       QP_RDMA_READ = 0x00000001,      /* RDMA read enabled? */
-       QP_RDMA_WRITE = 0x00000002,     /* RDMA write enabled? */
-       QP_MW_BIND = 0x00000004,        /* MWs enabled */
-       QP_ZERO_STAG = 0x00000008,      /* enabled? */
-       QP_REMOTE_TERMINATION = 0x00000010,     /* remote end terminated */
-       QP_RDMA_READ_RESPONSE = 0x00000020      /* Remote RDMA read  */
-           /* enabled? */
-};
-
-struct c2wr_qp_create_req {
-       struct c2wr_hdr hdr;
-       __be64 shared_sq_ht;
-       __be64 shared_rq_ht;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 sq_cq_handle;
-       u32 rq_cq_handle;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 srq_handle;
-       u32 srq_limit;
-       __be32 flags;           /* see enum c2wr_qp_flags */
-       __be32 send_sgl_depth;
-       __be32 recv_sgl_depth;
-       __be32 rdma_write_sgl_depth;
-       __be32 ord;
-       __be32 ird;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_create_rep {
-       struct c2wr_hdr hdr;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 send_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u32 ord;
-       u32 ird;
-       __be32 sq_msg_size;
-       __be32 sq_mq_index;
-       __be32 sq_mq_start;
-       __be32 rq_msg_size;
-       __be32 rq_mq_index;
-       __be32 rq_mq_start;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-union c2wr_qp_create {
-       struct c2wr_qp_create_req req;
-       struct c2wr_qp_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_rep {
-       struct c2wr_hdr hdr;
-       u64 user_context;
-       u32 rnic_handle;
-       u32 sq_depth;
-       u32 rq_depth;
-       u32 send_sgl_depth;
-       u32 rdma_write_sgl_depth;
-       u32 recv_sgl_depth;
-       u32 ord;
-       u32 ird;
-       u16 qp_state;
-       u16 flags;              /* see c2wr_qp_flags_t */
-       u32 qp_id;
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-       u32 terminate_msg_length;       /* 0 if not present */
-       u8 data[0];
-       /* Terminate Message in-line here. */
-} __attribute__((packed)) ;
-
-union c2wr_qp_query {
-       struct c2wr_qp_query_req req;
-       struct c2wr_qp_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_req {
-       struct c2wr_hdr hdr;
-       u64 stream_msg;
-       u32 stream_msg_length;
-       u32 rnic_handle;
-       u32 qp_handle;
-       __be32 next_qp_state;
-       __be32 ord;
-       __be32 ird;
-       __be32 sq_depth;
-       __be32 rq_depth;
-       u32 llp_ep_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_rep {
-       struct c2wr_hdr hdr;
-       u32 ord;
-       u32 ird;
-       u32 sq_depth;
-       u32 rq_depth;
-       u32 sq_msg_size;
-       u32 sq_mq_index;
-       u32 sq_mq_start;
-       u32 rq_msg_size;
-       u32 rq_mq_index;
-       u32 rq_mq_start;
-} __attribute__((packed)) ;
-
-union c2wr_qp_modify {
-       struct c2wr_qp_modify_req req;
-       struct c2wr_qp_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_qp_destroy {
-       struct c2wr_qp_destroy_req req;
-       struct c2wr_qp_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
- * only be posted when a QP is in IDLE state.  After the connect request is
- * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
- * No synchronous reply from adapter to this WR.  The results of
- * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
- * See c2wr_ae_active_connect_results_t
- */
-struct c2wr_qp_connect_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;
-       __be32 remote_addr;
-       __be16 remote_port;
-       u16 pad;
-       __be32 private_data_length;
-       u8 private_data[0];     /* Private data in-line. */
-} __attribute__((packed)) ;
-
-struct c2wr_qp_connect {
-       struct c2wr_qp_connect_req req;
-       /* no synchronous reply.         */
-} __attribute__((packed)) ;
-
-
-/*
- *------------------------ MM ------------------------
- */
-
-struct c2wr_nsmr_stag_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pbl_depth;
-       u32 pd_id;
-       u32 flags;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_stag_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_stag_alloc {
-       struct c2wr_nsmr_stag_alloc_req req;
-       struct c2wr_nsmr_stag_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_req {
-       struct c2wr_hdr hdr;
-       __be64 va;
-       u32 rnic_handle;
-       __be16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 pd_id;
-       __be32 pbl_depth;
-       __be32 pbe_size;
-       __be32 fbo;
-       __be32 length;
-       __be32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       __be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       __be32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_register {
-       struct c2wr_nsmr_register_req req;
-       struct c2wr_nsmr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 flags;
-       __be32 stag_index;
-       __be32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       __be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_pbl {
-       struct c2wr_nsmr_pbl_req req;
-       struct c2wr_nsmr_pbl_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_rep {
-       struct c2wr_hdr hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 pd_id;
-       u32 flags;
-       u32 pbl_depth;
-} __attribute__((packed)) ;
-
-union c2wr_mr_query {
-       struct c2wr_mr_query_req req;
-       struct c2wr_mr_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_rep {
-       struct c2wr_hdr hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 pd_id;
-       u32 flags;
-} __attribute__((packed)) ;
-
-union c2wr_mw_query {
-       struct c2wr_mw_query_req req;
-       struct c2wr_mw_query_rep rep;
-} __attribute__((packed)) ;
-
-
-struct c2wr_stag_dealloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       __be32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_stag_dealloc_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_stag_dealloc {
-       struct c2wr_stag_dealloc_req req;
-       struct c2wr_stag_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_req {
-       struct c2wr_hdr hdr;
-       u64 va;
-       u32 rnic_handle;
-       u16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 stag_index;
-       u32 pd_id;
-       u32 pbl_depth;
-       u32 pbe_size;
-       u32 fbo;
-       u32 length;
-       u32 addrs_length;
-       u32 pad1;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       u64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_rep {
-       struct c2wr_hdr hdr;
-       u32 pbl_depth;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_reregister {
-       struct c2wr_nsmr_reregister_req req;
-       struct c2wr_nsmr_reregister_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_req {
-       struct c2wr_hdr hdr;
-       u64 va;
-       u32 rnic_handle;
-       u16 flags;
-       u8 stag_key;
-       u8 pad;
-       u32 stag_index;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_rep {
-       struct c2wr_hdr hdr;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_smr_register {
-       struct c2wr_smr_register_req req;
-       struct c2wr_smr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_mw_alloc {
-       struct c2wr_mw_alloc_req req;
-       struct c2wr_mw_alloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ WRs -----------------------
- */
-
-struct c2wr_user_hdr {
-       struct c2wr_hdr hdr;            /* Has status and WR Type */
-} __attribute__((packed)) ;
-
-enum c2_qp_state {
-       C2_QP_STATE_IDLE = 0x01,
-       C2_QP_STATE_CONNECTING = 0x02,
-       C2_QP_STATE_RTS = 0x04,
-       C2_QP_STATE_CLOSING = 0x08,
-       C2_QP_STATE_TERMINATE = 0x10,
-       C2_QP_STATE_ERROR = 0x20,
-};
-
-/* Completion queue entry. */
-struct c2wr_ce {
-       struct c2wr_hdr hdr;            /* Has status and WR Type */
-       u64 qp_user_context;    /* c2_user_qp_t * */
-       u32 qp_state;           /* Current QP State */
-       u32 handle;             /* QPID or EP Handle */
-       __be32 bytes_rcvd;              /* valid for RECV WCs */
-       u32 stag;
-} __attribute__((packed)) ;
-
-
-/*
- * Flags used for all post-sq WRs.  These must fit in the flags
- * field of the struct c2wr_hdr (eight bits).
- */
-enum {
-       SQ_SIGNALED = 0x01,
-       SQ_READ_FENCE = 0x02,
-       SQ_FENCE = 0x04,
-};
-
-/*
- * Common fields for all post-sq WRs.  Namely the standard header and a
- * secondary header with fields common to all post-sq WRs.
- */
-struct c2_sq_hdr {
-       struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * Same as above but for post-rq WRs.
- */
-struct c2_rq_hdr {
-       struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * use the same struct for all sends.
- */
-struct c2wr_send_req {
-       struct c2_sq_hdr sq_hdr;
-       __be32 sge_len;
-       __be32 remote_stag;
-       u8 data[0];             /* SGE array */
-} __attribute__((packed));
-
-union c2wr_send {
-       struct c2wr_send_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_write_req {
-       struct c2_sq_hdr sq_hdr;
-       __be64 remote_to;
-       __be32 remote_stag;
-       __be32 sge_len;
-       u8 data[0];             /* SGE array */
-} __attribute__((packed));
-
-union c2wr_rdma_write {
-       struct c2wr_rdma_write_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_read_req {
-       struct c2_sq_hdr sq_hdr;
-       __be64 local_to;
-       __be64 remote_to;
-       __be32 local_stag;
-       __be32 remote_stag;
-       __be32 length;
-} __attribute__((packed));
-
-union c2wr_rdma_read {
-       struct c2wr_rdma_read_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_mw_bind_req {
-       struct c2_sq_hdr sq_hdr;
-       u64 va;
-       u8 stag_key;
-       u8 pad[3];
-       u32 mw_stag_index;
-       u32 mr_stag_index;
-       u32 length;
-       u32 flags;
-} __attribute__((packed));
-
-union c2wr_mw_bind {
-       struct c2wr_mw_bind_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_nsmr_fastreg_req {
-       struct c2_sq_hdr sq_hdr;
-       u64 va;
-       u8 stag_key;
-       u8 pad[3];
-       u32 stag_index;
-       u32 pbe_size;
-       u32 fbo;
-       u32 length;
-       u32 addrs_length;
-       /* array of paddrs (must be aligned on a 64bit boundary) */
-       u64 paddrs[0];
-} __attribute__((packed));
-
-union c2wr_nsmr_fastreg {
-       struct c2wr_nsmr_fastreg_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_stag_invalidate_req {
-       struct c2_sq_hdr sq_hdr;
-       u8 stag_key;
-       u8 pad[3];
-       u32 stag_index;
-} __attribute__((packed));
-
-union c2wr_stag_invalidate {
-       struct c2wr_stag_invalidate_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-union c2wr_sqwr {
-       struct c2_sq_hdr sq_hdr;
-       struct c2wr_send_req send;
-       struct c2wr_send_req send_se;
-       struct c2wr_send_req send_inv;
-       struct c2wr_send_req send_se_inv;
-       struct c2wr_rdma_write_req rdma_write;
-       struct c2wr_rdma_read_req rdma_read;
-       struct c2wr_mw_bind_req mw_bind;
-       struct c2wr_nsmr_fastreg_req nsmr_fastreg;
-       struct c2wr_stag_invalidate_req stag_inv;
-} __attribute__((packed));
-
-
-/*
- * RQ WRs
- */
-struct c2wr_rqwr {
-       struct c2_rq_hdr rq_hdr;
-       u8 data[0];             /* array of SGEs */
-} __attribute__((packed));
-
-union c2wr_recv {
-       struct c2wr_rqwr req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-/*
- * All AEs start with this header.  Most AEs only need to convey the
- * information in the header.  Some, like LLP connection events, need
- * more info.  The union typdef c2wr_ae_t has all the possible AEs.
- *
- * hdr.context is the user_context from the rnic_open WR.  NULL If this
- * is not affiliated with an rnic
- *
- * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
- * CCAE_LLP_CLOSE_COMPLETE)
- *
- * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
- *
- * user_context is the context passed down when the host created the resource.
- */
-struct c2wr_ae_hdr {
-       struct c2wr_hdr hdr;
-       u64 user_context;       /* user context for this res. */
-       __be32 resource_type;   /* see enum c2_resource_indicator */
-       __be32 resource;        /* handle for resource */
-       __be32 qp_state;        /* current QP State */
-} __attribute__((packed));
-
-/*
- * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
- * the adapter moves the QP into RTS state
- */
-struct c2wr_ae_active_connect_results {
-       struct c2wr_ae_hdr ae_hdr;
-       __be32 laddr;
-       __be32 raddr;
-       __be16 lport;
-       __be16 rport;
-       __be32 private_data_length;
-       u8 private_data[0];     /* data is in-line in the msg. */
-} __attribute__((packed));
-
-/*
- * When connections are established by the stack (and the private data
- * MPA frame is received), the adapter will generate an event to the host.
- * The details of the connection, any private data, and the new connection
- * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
- * AE queue:
- */
-struct c2wr_ae_connection_request {
-       struct c2wr_ae_hdr ae_hdr;
-       u32 cr_handle;          /* connreq handle (sock ptr) */
-       __be32 laddr;
-       __be32 raddr;
-       __be16 lport;
-       __be16 rport;
-       __be32 private_data_length;
-       u8 private_data[0];     /* data is in-line in the msg. */
-} __attribute__((packed));
-
-union c2wr_ae {
-       struct c2wr_ae_hdr ae_generic;
-       struct c2wr_ae_active_connect_results ae_active_connect_results;
-       struct c2wr_ae_connection_request ae_connection_request;
-} __attribute__((packed));
-
-struct c2wr_init_req {
-       struct c2wr_hdr hdr;
-       __be64 hint_count;
-       __be64 q0_host_shared;
-       __be64 q1_host_shared;
-       __be64 q1_host_msg_pool;
-       __be64 q2_host_shared;
-       __be64 q2_host_msg_pool;
-} __attribute__((packed));
-
-struct c2wr_init_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_init {
-       struct c2wr_init_req req;
-       struct c2wr_init_rep rep;
-} __attribute__((packed));
-
-/*
- * For upgrading flash.
- */
-
-struct c2wr_flash_init_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-} __attribute__((packed));
-
-struct c2wr_flash_init_rep {
-       struct c2wr_hdr hdr;
-       u32 adapter_flash_buf_offset;
-       u32 adapter_flash_len;
-} __attribute__((packed));
-
-union c2wr_flash_init {
-       struct c2wr_flash_init_req req;
-       struct c2wr_flash_init_rep rep;
-} __attribute__((packed));
-
-struct c2wr_flash_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 len;
-} __attribute__((packed));
-
-struct c2wr_flash_rep {
-       struct c2wr_hdr hdr;
-       u32 status;
-} __attribute__((packed));
-
-union c2wr_flash {
-       struct c2wr_flash_req req;
-       struct c2wr_flash_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 size;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_rep {
-       struct c2wr_hdr hdr;
-       u32 offset;             /* 0 if mem not available */
-       u32 size;               /* 0 if mem not available */
-} __attribute__((packed));
-
-union c2wr_buf_alloc {
-       struct c2wr_buf_alloc_req req;
-       struct c2wr_buf_alloc_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_free_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 offset;             /* Must match value from alloc */
-       u32 size;               /* Must match value from alloc */
-} __attribute__((packed));
-
-struct c2wr_buf_free_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_buf_free {
-       struct c2wr_buf_free_req req;
-       struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_flash_write_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 offset;
-       u32 size;
-       u32 type;
-       u32 flags;
-} __attribute__((packed));
-
-struct c2wr_flash_write_rep {
-       struct c2wr_hdr hdr;
-       u32 status;
-} __attribute__((packed));
-
-union c2wr_flash_write {
-       struct c2wr_flash_write_req req;
-       struct c2wr_flash_write_rep rep;
-} __attribute__((packed));
-
-/*
- * Messages for LLP connection setup.
- */
-
-/*
- * Listen Request.  This allocates a listening endpoint to allow passive
- * connection setup.  Newly established LLP connections are passed up
- * via an AE.  See c2wr_ae_connection_request_t
- */
-struct c2wr_ep_listen_create_req {
-       struct c2wr_hdr hdr;
-       u64 user_context;       /* returned in AEs. */
-       u32 rnic_handle;
-       __be32 local_addr;              /* local addr, or 0  */
-       __be16 local_port;              /* 0 means "pick one" */
-       u16 pad;
-       __be32 backlog;         /* tradional tcp listen bl */
-} __attribute__((packed));
-
-struct c2wr_ep_listen_create_rep {
-       struct c2wr_hdr hdr;
-       u32 ep_handle;          /* handle to new listening ep */
-       u16 local_port;         /* resulting port... */
-       u16 pad;
-} __attribute__((packed));
-
-union c2wr_ep_listen_create {
-       struct c2wr_ep_listen_create_req req;
-       struct c2wr_ep_listen_create_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_ep_listen_destroy {
-       struct c2wr_ep_listen_destroy_req req;
-       struct c2wr_ep_listen_destroy_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_query_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_query_rep {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 local_addr;
-       u32 remote_addr;
-       u16 local_port;
-       u16 remote_port;
-} __attribute__((packed));
-
-union c2wr_ep_query {
-       struct c2wr_ep_query_req req;
-       struct c2wr_ep_query_rep rep;
-} __attribute__((packed));
-
-
-/*
- * The host passes this down to indicate acceptance of a pending iWARP
- * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
- * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
- */
-struct c2wr_cr_accept_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 qp_handle;          /* QP to bind to this LLP conn */
-       u32 ep_handle;          /* LLP  handle to accept */
-       __be32 private_data_length;
-       u8 private_data[0];     /* data in-line in msg. */
-} __attribute__((packed));
-
-/*
- * adapter sends reply when private data is successfully submitted to
- * the LLP.
- */
-struct c2wr_cr_accept_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_accept {
-       struct c2wr_cr_accept_req req;
-       struct c2wr_cr_accept_rep rep;
-} __attribute__((packed));
-
-/*
- * The host sends this down if a given iWARP connection request was
- * rejected by the consumer.  The cr_handle was obtained from a
- * previous c2wr_ae_connection_request_t AE sent by the adapter.
- */
-struct  c2wr_cr_reject_req {
-       struct c2wr_hdr hdr;
-       u32 rnic_handle;
-       u32 ep_handle;          /* LLP handle to reject */
-} __attribute__((packed));
-
-/*
- * Dunno if this is needed, but we'll add it for now.  The adapter will
- * send the reject_reply after the LLP endpoint has been destroyed.
- */
-struct  c2wr_cr_reject_rep {
-       struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_reject {
-       struct c2wr_cr_reject_req req;
-       struct c2wr_cr_reject_rep rep;
-} __attribute__((packed));
-
-/*
- * console command.  Used to implement a debug console over the verbs
- * request and reply queues.
- */
-
-/*
- * Console request message.  It contains:
- *     - message hdr with id = CCWR_CONSOLE
- *     - the physaddr/len of host memory to be used for the reply.
- *     - the command string.  eg:  "netstat -s" or "zoneinfo"
- */
-struct c2wr_console_req {
-       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
-       u64 reply_buf;          /* pinned host buf for reply */
-       u32 reply_buf_len;      /* length of reply buffer */
-       u8 command[0];          /* NUL terminated ascii string */
-       /* containing the command req */
-} __attribute__((packed));
-
-/*
- * flags used in the console reply.
- */
-enum c2_console_flags {
-       CONS_REPLY_TRUNCATED = 0x00000001       /* reply was truncated */
-} __attribute__((packed));
-
-/*
- * Console reply message.
- * hdr.result contains the c2_status_t error if the reply was _not_ generated,
- * or C2_OK if the reply was generated.
- */
-struct c2wr_console_rep {
-       struct c2wr_hdr hdr;            /* id = CCWR_CONSOLE */
-       u32 flags;
-} __attribute__((packed));
-
-union c2wr_console {
-       struct c2wr_console_req req;
-       struct c2wr_console_rep rep;
-} __attribute__((packed));
-
-
-/*
- * Giant union with all WRs.  Makes life easier...
- */
-union c2wr {
-       struct c2wr_hdr hdr;
-       struct c2wr_user_hdr user_hdr;
-       union c2wr_rnic_open rnic_open;
-       union c2wr_rnic_query rnic_query;
-       union c2wr_rnic_getconfig rnic_getconfig;
-       union c2wr_rnic_setconfig rnic_setconfig;
-       union c2wr_rnic_close rnic_close;
-       union c2wr_cq_create cq_create;
-       union c2wr_cq_modify cq_modify;
-       union c2wr_cq_destroy cq_destroy;
-       union c2wr_pd_alloc pd_alloc;
-       union c2wr_pd_dealloc pd_dealloc;
-       union c2wr_srq_create srq_create;
-       union c2wr_srq_destroy srq_destroy;
-       union c2wr_qp_create qp_create;
-       union c2wr_qp_query qp_query;
-       union c2wr_qp_modify qp_modify;
-       union c2wr_qp_destroy qp_destroy;
-       struct c2wr_qp_connect qp_connect;
-       union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
-       union c2wr_nsmr_register nsmr_register;
-       union c2wr_nsmr_pbl nsmr_pbl;
-       union c2wr_mr_query mr_query;
-       union c2wr_mw_query mw_query;
-       union c2wr_stag_dealloc stag_dealloc;
-       union c2wr_sqwr sqwr;
-       struct c2wr_rqwr rqwr;
-       struct c2wr_ce ce;
-       union c2wr_ae ae;
-       union c2wr_init init;
-       union c2wr_ep_listen_create ep_listen_create;
-       union c2wr_ep_listen_destroy ep_listen_destroy;
-       union c2wr_cr_accept cr_accept;
-       union c2wr_cr_reject cr_reject;
-       union c2wr_console console;
-       union c2wr_flash_init flash_init;
-       union c2wr_flash flash;
-       union c2wr_buf_alloc buf_alloc;
-       union c2wr_buf_free buf_free;
-       union c2wr_flash_write flash_write;
-} __attribute__((packed));
-
-
-/*
- * Accessors for the wr fields that are packed together tightly to
- * reduce the wr message size.  The wr arguments are void* so that
- * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
- * in the struct c2wr union can be passed in.
- */
-static __inline__ u8 c2_wr_get_id(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->id;
-}
-static __inline__ void c2_wr_set_id(void *wr, u8 id)
-{
-       ((struct c2wr_hdr *) wr)->id = id;
-}
-static __inline__ u8 c2_wr_get_result(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->result;
-}
-static __inline__ void c2_wr_set_result(void *wr, u8 result)
-{
-       ((struct c2wr_hdr *) wr)->result = result;
-}
-static __inline__ u8 c2_wr_get_flags(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->flags;
-}
-static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
-{
-       ((struct c2wr_hdr *) wr)->flags = flags;
-}
-static __inline__ u8 c2_wr_get_sge_count(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->sge_count;
-}
-static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
-{
-       ((struct c2wr_hdr *) wr)->sge_count = sge_count;
-}
-static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
-{
-       return ((struct c2wr_hdr *) wr)->wqe_count;
-}
-static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
-{
-       ((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
-}
-
-#endif                         /* _C2_WR_H_ */
diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig
deleted file mode 100644 (file)
index 3fadd2a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-config INFINIBAND_EHCA
-       tristate "eHCA support"
-       depends on IBMEBUS
-       ---help---
-       This driver supports the deprecated IBM pSeries eHCA InfiniBand
-       adapter.
-
-       To compile the driver as a module, choose M here. The module
-       will be called ib_ehca.
-
diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile
deleted file mode 100644 (file)
index 74d284e..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Authors: Heiko J Schick <schickhj@de.ibm.com>
-#           Christoph Raisch <raisch@de.ibm.com>
-#           Joachim Fenkes <fenkes@de.ibm.com>
-#
-#  Copyright (c) 2005 IBM Corporation
-#
-#  All rights reserved.
-#
-#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
-
-obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
-
-ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
-               ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
-               ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
-
diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO
deleted file mode 100644 (file)
index 199a4a6..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-9/2015
-
-The ehca driver has been deprecated and moved to drivers/staging/rdma.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c
deleted file mode 100644 (file)
index 94e088c..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  address vector functions
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *av_cache;
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-                 enum ib_rate path_rate, u32 *ipd)
-{
-       int path = ib_rate_to_mult(path_rate);
-       int link, ret;
-       struct ib_port_attr pa;
-
-       if (path_rate == IB_RATE_PORT_CURRENT) {
-               *ipd = 0;
-               return 0;
-       }
-
-       if (unlikely(path < 0)) {
-               ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
-                        path_rate);
-               return -EINVAL;
-       }
-
-       ret = ehca_query_port(&shca->ib_device, port, &pa);
-       if (unlikely(ret < 0)) {
-               ehca_err(&shca->ib_device, "Failed to query port  ret=%i", ret);
-               return ret;
-       }
-
-       link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
-
-       if (path >= link)
-               /* no need to throttle if path faster than link */
-               *ipd = 0;
-       else
-               /* IPD = round((link / path) - 1) */
-               *ipd = ((link + (path >> 1)) / path) - 1;
-
-       return 0;
-}
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-       int ret;
-       struct ehca_av *av;
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-
-       av = kmem_cache_alloc(av_cache, GFP_KERNEL);
-       if (!av) {
-               ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
-                        pd, ah_attr);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       av->av.sl = ah_attr->sl;
-       av->av.dlid = ah_attr->dlid;
-       av->av.slid_path_bits = ah_attr->src_path_bits;
-
-       if (ehca_static_rate < 0) {
-               u32 ipd;
-
-               if (ehca_calc_ipd(shca, ah_attr->port_num,
-                                 ah_attr->static_rate, &ipd)) {
-                       ret = -EINVAL;
-                       goto create_ah_exit1;
-               }
-               av->av.ipd = ipd;
-       } else
-               av->av.ipd = ehca_static_rate;
-
-       av->av.lnh = ah_attr->ah_flags;
-       av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
-                                           ah_attr->grh.traffic_class);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-                                           ah_attr->grh.flow_label);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-                                           ah_attr->grh.hop_limit);
-       av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
-       /* set sgid in grh.word_1 */
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               int rc;
-               struct ib_port_attr port_attr;
-               union ib_gid gid;
-
-               memset(&port_attr, 0, sizeof(port_attr));
-               rc = ehca_query_port(pd->device, ah_attr->port_num,
-                                    &port_attr);
-               if (rc) { /* invalid port number */
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Invalid port number "
-                                "ehca_query_port() returned %x "
-                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
-                       goto create_ah_exit1;
-               }
-               memset(&gid, 0, sizeof(gid));
-               rc = ehca_query_gid(pd->device,
-                                   ah_attr->port_num,
-                                   ah_attr->grh.sgid_index, &gid);
-               if (rc) {
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Failed to retrieve sgid "
-                                "ehca_query_gid() returned %x "
-                                "pd=%p ah_attr=%p", rc, pd, ah_attr);
-                       goto create_ah_exit1;
-               }
-               memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
-       }
-       av->av.pmtu = shca->max_mtu;
-
-       /* dgid comes in grh.word_3 */
-       memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
-              sizeof(ah_attr->grh.dgid));
-
-       return &av->ib_ah;
-
-create_ah_exit1:
-       kmem_cache_free(av_cache, av);
-
-       return ERR_PTR(ret);
-}
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-       struct ehca_av *av;
-       struct ehca_ud_av new_ehca_av;
-       struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
-                                             ib_device);
-
-       memset(&new_ehca_av, 0, sizeof(new_ehca_av));
-       new_ehca_av.sl = ah_attr->sl;
-       new_ehca_av.dlid = ah_attr->dlid;
-       new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
-       new_ehca_av.ipd = ah_attr->static_rate;
-       new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
-                                        (ah_attr->ah_flags & IB_AH_GRH) > 0);
-       new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
-                                               ah_attr->grh.traffic_class);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-                                                ah_attr->grh.flow_label);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-                                                ah_attr->grh.hop_limit);
-       new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
-
-       /* set sgid in grh.word_1 */
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               int rc;
-               struct ib_port_attr port_attr;
-               union ib_gid gid;
-
-               memset(&port_attr, 0, sizeof(port_attr));
-               rc = ehca_query_port(ah->device, ah_attr->port_num,
-                                    &port_attr);
-               if (rc) { /* invalid port number */
-                       ehca_err(ah->device, "Invalid port number "
-                                "ehca_query_port() returned %x "
-                                "ah=%p ah_attr=%p port_num=%x",
-                                rc, ah, ah_attr, ah_attr->port_num);
-                       return -EINVAL;
-               }
-               memset(&gid, 0, sizeof(gid));
-               rc = ehca_query_gid(ah->device,
-                                   ah_attr->port_num,
-                                   ah_attr->grh.sgid_index, &gid);
-               if (rc) {
-                       ehca_err(ah->device, "Failed to retrieve sgid "
-                                "ehca_query_gid() returned %x "
-                                "ah=%p ah_attr=%p port_num=%x "
-                                "sgid_index=%x",
-                                rc, ah, ah_attr, ah_attr->port_num,
-                                ah_attr->grh.sgid_index);
-                       return -EINVAL;
-               }
-               memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
-       }
-
-       new_ehca_av.pmtu = shca->max_mtu;
-
-       memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
-              sizeof(ah_attr->grh.dgid));
-
-       av = container_of(ah, struct ehca_av, ib_ah);
-       av->av = new_ehca_av;
-
-       return 0;
-}
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-       struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
-
-       memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
-              sizeof(ah_attr->grh.dgid));
-       ah_attr->sl = av->av.sl;
-
-       ah_attr->dlid = av->av.dlid;
-
-       ah_attr->src_path_bits = av->av.slid_path_bits;
-       ah_attr->static_rate = av->av.ipd;
-       ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
-       ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
-                                                   av->av.grh.word_0);
-       ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
-                                               av->av.grh.word_0);
-       ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
-                                                av->av.grh.word_0);
-
-       return 0;
-}
-
-int ehca_destroy_ah(struct ib_ah *ah)
-{
-       kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
-
-       return 0;
-}
-
-int ehca_init_av_cache(void)
-{
-       av_cache = kmem_cache_create("ehca_cache_av",
-                                  sizeof(struct ehca_av), 0,
-                                  SLAB_HWCACHE_ALIGN,
-                                  NULL);
-       if (!av_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_av_cache(void)
-{
-       kmem_cache_destroy(av_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
deleted file mode 100644 (file)
index bd45e0f..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Struct definition for eHCA internal structures
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_H__
-#define __EHCA_CLASSES_H__
-
-struct ehca_module;
-struct ehca_qp;
-struct ehca_cq;
-struct ehca_eq;
-struct ehca_mr;
-struct ehca_mw;
-struct ehca_pd;
-struct ehca_av;
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
-
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
-#include "ipz_pt_fn.h"
-#include "ehca_qes.h"
-#include "ehca_irq.h"
-
-#define EHCA_EQE_CACHE_SIZE 20
-#define EHCA_MAX_NUM_QUEUES 0xffff
-
-struct ehca_eqe_cache_entry {
-       struct ehca_eqe *eqe;
-       struct ehca_cq *cq;
-};
-
-struct ehca_eq {
-       u32 length;
-       struct ipz_queue ipz_queue;
-       struct ipz_eq_handle ipz_eq_handle;
-       struct work_struct work;
-       struct h_galpas galpas;
-       int is_initialized;
-       struct ehca_pfeq pf;
-       spinlock_t spinlock;
-       struct tasklet_struct interrupt_task;
-       u32 ist;
-       spinlock_t irq_spinlock;
-       struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
-};
-
-struct ehca_sma_attr {
-       u16 lid, lmc, sm_sl, sm_lid;
-       u16 pkey_tbl_len, pkeys[16];
-};
-
-struct ehca_sport {
-       struct ib_cq *ibcq_aqp1;
-       struct ib_qp *ibqp_sqp[2];
-       /* lock to serialze modify_qp() calls for sqp in normal
-        * and irq path (when event PORT_ACTIVE is received first time)
-        */
-       spinlock_t mod_sqp_lock;
-       enum ib_port_state port_state;
-       struct ehca_sma_attr saved_attr;
-       u32 pma_qp_nr;
-};
-
-#define HCA_CAP_MR_PGSIZE_4K  0x80000000
-#define HCA_CAP_MR_PGSIZE_64K 0x40000000
-#define HCA_CAP_MR_PGSIZE_1M  0x20000000
-#define HCA_CAP_MR_PGSIZE_16M 0x10000000
-
-struct ehca_shca {
-       struct ib_device ib_device;
-       struct platform_device *ofdev;
-       u8 num_ports;
-       int hw_level;
-       struct list_head shca_list;
-       struct ipz_adapter_handle ipz_hca_handle;
-       struct ehca_sport sport[2];
-       struct ehca_eq eq;
-       struct ehca_eq neq;
-       struct ehca_mr *maxmr;
-       struct ehca_pd *pd;
-       struct h_galpas galpas;
-       struct mutex modify_mutex;
-       u64 hca_cap;
-       /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
-       u32 hca_cap_mr_pgsize;
-       int max_mtu;
-       int max_num_qps;
-       int max_num_cqs;
-       atomic_t num_cqs;
-       atomic_t num_qps;
-};
-
-struct ehca_pd {
-       struct ib_pd ib_pd;
-       struct ipz_pd fw_pd;
-       /* small queue mgmt */
-       struct mutex lock;
-       struct list_head free[2];
-       struct list_head full[2];
-};
-
-enum ehca_ext_qp_type {
-       EQPT_NORMAL    = 0,
-       EQPT_LLQP      = 1,
-       EQPT_SRQBASE   = 2,
-       EQPT_SRQ       = 3,
-};
-
-/* struct to cache modify_qp()'s parms for GSI/SMI qp */
-struct ehca_mod_qp_parm {
-       int mask;
-       struct ib_qp_attr attr;
-};
-
-#define EHCA_MOD_QP_PARM_MAX 4
-
-#define QMAP_IDX_MASK 0xFFFFULL
-
-/* struct for tracking if cqes have been reported to the application */
-struct ehca_qmap_entry {
-       u16 app_wr_id;
-       u8 reported;
-       u8 cqe_req;
-};
-
-struct ehca_queue_map {
-       struct ehca_qmap_entry *map;
-       unsigned int entries;
-       unsigned int tail;
-       unsigned int left_to_poll;
-       unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
-};
-
-/* function to calculate the next index for the qmap */
-static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
-{
-       unsigned int temp = cur_index + 1;
-       return (temp == limit) ? 0 : temp;
-}
-
-struct ehca_qp {
-       union {
-               struct ib_qp ib_qp;
-               struct ib_srq ib_srq;
-       };
-       u32 qp_type;
-       enum ehca_ext_qp_type ext_type;
-       enum ib_qp_state state;
-       struct ipz_queue ipz_squeue;
-       struct ehca_queue_map sq_map;
-       struct ipz_queue ipz_rqueue;
-       struct ehca_queue_map rq_map;
-       struct h_galpas galpas;
-       u32 qkey;
-       u32 real_qp_num;
-       u32 token;
-       spinlock_t spinlock_s;
-       spinlock_t spinlock_r;
-       u32 sq_max_inline_data_size;
-       struct ipz_qp_handle ipz_qp_handle;
-       struct ehca_pfqp pf;
-       struct ib_qp_init_attr init_attr;
-       struct ehca_cq *send_cq;
-       struct ehca_cq *recv_cq;
-       unsigned int sqerr_purgeflag;
-       struct hlist_node list_entries;
-       /* array to cache modify_qp()'s parms for GSI/SMI qp */
-       struct ehca_mod_qp_parm *mod_qp_parm;
-       int mod_qp_parm_idx;
-       /* mmap counter for resources mapped into user space */
-       u32 mm_count_squeue;
-       u32 mm_count_rqueue;
-       u32 mm_count_galpa;
-       /* unsolicited ack circumvention */
-       int unsol_ack_circ;
-       int mtu_shift;
-       u32 message_count;
-       u32 packet_count;
-       atomic_t nr_events; /* events seen */
-       wait_queue_head_t wait_completion;
-       int mig_armed;
-       struct list_head sq_err_node;
-       struct list_head rq_err_node;
-};
-
-#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
-#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
-#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
-
-/* must be power of 2 */
-#define QP_HASHTAB_LEN 8
-
-struct ehca_cq {
-       struct ib_cq ib_cq;
-       struct ipz_queue ipz_queue;
-       struct h_galpas galpas;
-       spinlock_t spinlock;
-       u32 cq_number;
-       u32 token;
-       u32 nr_of_entries;
-       struct ipz_cq_handle ipz_cq_handle;
-       struct ehca_pfcq pf;
-       spinlock_t cb_lock;
-       struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
-       struct list_head entry;
-       u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
-       atomic_t nr_events; /* #events seen */
-       wait_queue_head_t wait_completion;
-       spinlock_t task_lock;
-       /* mmap counter for resources mapped into user space */
-       u32 mm_count_queue;
-       u32 mm_count_galpa;
-       struct list_head sqp_err_list;
-       struct list_head rqp_err_list;
-};
-
-enum ehca_mr_flag {
-       EHCA_MR_FLAG_FMR = 0x80000000,   /* FMR, created with ehca_alloc_fmr */
-       EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
-};
-
-struct ehca_mr {
-       union {
-               struct ib_mr ib_mr;     /* must always be first in ehca_mr */
-               struct ib_fmr ib_fmr;   /* must always be first in ehca_mr */
-       } ib;
-       struct ib_umem *umem;
-       spinlock_t mrlock;
-
-       enum ehca_mr_flag flags;
-       u32 num_kpages;         /* number of kernel pages */
-       u32 num_hwpages;        /* number of hw pages to form MR */
-       u64 hwpage_size;        /* hw page size used for this MR */
-       int acl;                /* ACL (stored here for usage in reregister) */
-       u64 *start;             /* virtual start address (stored here for */
-                               /* usage in reregister) */
-       u64 size;               /* size (stored here for usage in reregister) */
-       u32 fmr_page_size;      /* page size for FMR */
-       u32 fmr_max_pages;      /* max pages for FMR */
-       u32 fmr_max_maps;       /* max outstanding maps for FMR */
-       u32 fmr_map_cnt;        /* map counter for FMR */
-       /* fw specific data */
-       struct ipz_mrmw_handle ipz_mr_handle;   /* MR handle for h-calls */
-       struct h_galpas galpas;
-};
-
-struct ehca_mw {
-       struct ib_mw ib_mw;     /* gen2 mw, must always be first in ehca_mw */
-       spinlock_t mwlock;
-
-       u8 never_bound;         /* indication MW was never bound */
-       struct ipz_mrmw_handle ipz_mw_handle;   /* MW handle for h-calls */
-       struct h_galpas galpas;
-};
-
-enum ehca_mr_pgi_type {
-       EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
-                                 * ehca_rereg_phys_mr,
-                                 * ehca_reg_internal_maxmr */
-       EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
-       EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
-};
-
-struct ehca_mr_pginfo {
-       enum ehca_mr_pgi_type type;
-       u64 num_kpages;
-       u64 kpage_cnt;
-       u64 hwpage_size;     /* hw page size used for this MR */
-       u64 num_hwpages;     /* number of hw pages */
-       u64 hwpage_cnt;      /* counter for hw pages */
-       u64 next_hwpage;     /* next hw page in buffer/chunk/listelem */
-
-       union {
-               struct { /* type EHCA_MR_PGI_PHYS section */
-                       int num_phys_buf;
-                       struct ib_phys_buf *phys_buf_array;
-                       u64 next_buf;
-               } phy;
-               struct { /* type EHCA_MR_PGI_USER section */
-                       struct ib_umem *region;
-                       struct scatterlist *next_sg;
-                       u64 next_nmap;
-               } usr;
-               struct { /* type EHCA_MR_PGI_FMR section */
-                       u64 fmr_pgsize;
-                       u64 *page_list;
-                       u64 next_listelem;
-               } fmr;
-       } u;
-};
-
-/* output parameters for MR/FMR hipz calls */
-struct ehca_mr_hipzout_parms {
-       struct ipz_mrmw_handle handle;
-       u32 lkey;
-       u32 rkey;
-       u64 len;
-       u64 vaddr;
-       u32 acl;
-};
-
-/* output parameters for MW hipz calls */
-struct ehca_mw_hipzout_parms {
-       struct ipz_mrmw_handle handle;
-       u32 rkey;
-};
-
-struct ehca_av {
-       struct ib_ah ib_ah;
-       struct ehca_ud_av av;
-};
-
-struct ehca_ucontext {
-       struct ib_ucontext ib_ucontext;
-};
-
-int ehca_init_pd_cache(void);
-void ehca_cleanup_pd_cache(void);
-int ehca_init_cq_cache(void);
-void ehca_cleanup_cq_cache(void);
-int ehca_init_qp_cache(void);
-void ehca_cleanup_qp_cache(void);
-int ehca_init_av_cache(void);
-void ehca_cleanup_av_cache(void);
-int ehca_init_mrmw_cache(void);
-void ehca_cleanup_mrmw_cache(void);
-int ehca_init_small_qp_cache(void);
-void ehca_cleanup_small_qp_cache(void);
-
-extern rwlock_t ehca_qp_idr_lock;
-extern rwlock_t ehca_cq_idr_lock;
-extern struct idr ehca_qp_idr;
-extern struct idr ehca_cq_idr;
-extern spinlock_t shca_list_lock;
-
-extern int ehca_static_rate;
-extern int ehca_port_act_time;
-extern bool ehca_use_hp_mr;
-extern bool ehca_scaling_code;
-extern int ehca_lock_hcalls;
-extern int ehca_nr_ports;
-extern int ehca_max_cq;
-extern int ehca_max_qp;
-
-struct ipzu_queue_resp {
-       u32 qe_size;      /* queue entry size */
-       u32 act_nr_of_sg;
-       u32 queue_length; /* queue length allocated in bytes */
-       u32 pagesize;
-       u32 toggle_state;
-       u32 offset; /* save offset within a page for small_qp */
-};
-
-struct ehca_create_cq_resp {
-       u32 cq_number;
-       u32 token;
-       struct ipzu_queue_resp ipz_queue;
-       u32 fw_handle_ofs;
-       u32 dummy;
-};
-
-struct ehca_create_qp_resp {
-       u32 qp_num;
-       u32 token;
-       u32 qp_type;
-       u32 ext_type;
-       u32 qkey;
-       /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
-       u32 real_qp_num;
-       u32 fw_handle_ofs;
-       u32 dummy;
-       struct ipzu_queue_resp ipz_squeue;
-       struct ipzu_queue_resp ipz_rqueue;
-};
-
-struct ehca_alloc_cq_parms {
-       u32 nr_cqe;
-       u32 act_nr_of_entries;
-       u32 act_pages;
-       struct ipz_eq_handle eq_handle;
-};
-
-enum ehca_service_type {
-       ST_RC  = 0,
-       ST_UC  = 1,
-       ST_RD  = 2,
-       ST_UD  = 3,
-};
-
-enum ehca_ll_comp_flags {
-       LLQP_SEND_COMP = 0x20,
-       LLQP_RECV_COMP = 0x40,
-       LLQP_COMP_MASK = 0x60,
-};
-
-struct ehca_alloc_queue_parms {
-       /* input parameters */
-       int max_wr;
-       int max_sge;
-       int page_size;
-       int is_small;
-
-       /* output parameters */
-       u16 act_nr_wqes;
-       u8  act_nr_sges;
-       u32 queue_size; /* bytes for small queues, pages otherwise */
-};
-
-struct ehca_alloc_qp_parms {
-       struct ehca_alloc_queue_parms squeue;
-       struct ehca_alloc_queue_parms rqueue;
-
-       /* input parameters */
-       enum ehca_service_type servicetype;
-       int qp_storage;
-       int sigtype;
-       enum ehca_ext_qp_type ext_type;
-       enum ehca_ll_comp_flags ll_comp_flags;
-       int ud_av_l_key_ctl;
-
-       u32 token;
-       struct ipz_eq_handle eq_handle;
-       struct ipz_pd pd;
-       struct ipz_cq_handle send_cq_handle, recv_cq_handle;
-
-       u32 srq_qpn, srq_token, srq_limit;
-
-       /* output parameters */
-       u32 real_qp_num;
-       struct ipz_qp_handle qp_handle;
-       struct h_galpas galpas;
-};
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
deleted file mode 100644 (file)
index 689c357..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  pSeries interface definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_PSERIES_H__
-#define __EHCA_CLASSES_PSERIES_H__
-
-#include "hcp_phyp.h"
-#include "ipz_pt_fn.h"
-
-
-struct ehca_pfqp {
-       struct ipz_qpt sqpt;
-       struct ipz_qpt rqpt;
-};
-
-struct ehca_pfcq {
-       struct ipz_qpt qpt;
-       u32 cqnr;
-};
-
-struct ehca_pfeq {
-       struct ipz_qpt qpt;
-       struct h_galpa galpa;
-       u32 eqnr;
-};
-
-struct ipz_adapter_handle {
-       u64 handle;
-};
-
-struct ipz_cq_handle {
-       u64 handle;
-};
-
-struct ipz_eq_handle {
-       u64 handle;
-};
-
-struct ipz_qp_handle {
-       u64 handle;
-};
-struct ipz_mrmw_handle {
-       u64 handle;
-};
-
-struct ipz_pd {
-       u32 value;
-};
-
-struct hcp_modify_qp_control_block {
-       u32 qkey;                      /* 00 */
-       u32 rdd;                       /* reliable datagram domain */
-       u32 send_psn;                  /* 02 */
-       u32 receive_psn;               /* 03 */
-       u32 prim_phys_port;            /* 04 */
-       u32 alt_phys_port;             /* 05 */
-       u32 prim_p_key_idx;            /* 06 */
-       u32 alt_p_key_idx;             /* 07 */
-       u32 rdma_atomic_ctrl;          /* 08 */
-       u32 qp_state;                  /* 09 */
-       u32 reserved_10;               /* 10 */
-       u32 rdma_nr_atomic_resp_res;   /* 11 */
-       u32 path_migration_state;      /* 12 */
-       u32 rdma_atomic_outst_dest_qp; /* 13 */
-       u32 dest_qp_nr;                /* 14 */
-       u32 min_rnr_nak_timer_field;   /* 15 */
-       u32 service_level;             /* 16 */
-       u32 send_grh_flag;             /* 17 */
-       u32 retry_count;               /* 18 */
-       u32 timeout;                   /* 19 */
-       u32 path_mtu;                  /* 20 */
-       u32 max_static_rate;           /* 21 */
-       u32 dlid;                      /* 22 */
-       u32 rnr_retry_count;           /* 23 */
-       u32 source_path_bits;          /* 24 */
-       u32 traffic_class;             /* 25 */
-       u32 hop_limit;                 /* 26 */
-       u32 source_gid_idx;            /* 27 */
-       u32 flow_label;                /* 28 */
-       u32 reserved_29;               /* 29 */
-       union {                        /* 30 */
-               u64 dw[2];
-               u8 byte[16];
-       } dest_gid;
-       u32 service_level_al;          /* 34 */
-       u32 send_grh_flag_al;          /* 35 */
-       u32 retry_count_al;            /* 36 */
-       u32 timeout_al;                /* 37 */
-       u32 max_static_rate_al;        /* 38 */
-       u32 dlid_al;                   /* 39 */
-       u32 rnr_retry_count_al;        /* 40 */
-       u32 source_path_bits_al;       /* 41 */
-       u32 traffic_class_al;          /* 42 */
-       u32 hop_limit_al;              /* 43 */
-       u32 source_gid_idx_al;         /* 44 */
-       u32 flow_label_al;             /* 45 */
-       u32 reserved_46;               /* 46 */
-       u32 reserved_47;               /* 47 */
-       union {                        /* 48 */
-               u64 dw[2];
-               u8 byte[16];
-       } dest_gid_al;
-       u32 max_nr_outst_send_wr;      /* 52 */
-       u32 max_nr_outst_recv_wr;      /* 53 */
-       u32 disable_ete_credit_check;  /* 54 */
-       u32 qp_number;                 /* 55 */
-       u64 send_queue_handle;         /* 56 */
-       u64 recv_queue_handle;         /* 58 */
-       u32 actual_nr_sges_in_sq_wqe;  /* 60 */
-       u32 actual_nr_sges_in_rq_wqe;  /* 61 */
-       u32 qp_enable;                 /* 62 */
-       u32 curr_srq_limit;            /* 63 */
-       u64 qp_aff_asyn_ev_log_reg;    /* 64 */
-       u64 shared_rq_hndl;            /* 66 */
-       u64 trigg_doorbell_qp_hndl;    /* 68 */
-       u32 reserved_70_127[58];       /* 70 */
-};
-
-#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM( 0,  0)
-#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM( 2,  2)
-#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM( 3,  3)
-#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM( 4,  4)
-#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM( 5,  5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM( 6,  6)
-#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM( 7,  7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM( 8,  8)
-#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM( 9,  9)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11, 11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12, 12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13, 13)
-#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14, 14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15, 15)
-#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16, 16)
-#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17, 17)
-#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18, 18)
-#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19, 19)
-#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20, 20)
-#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22, 22)
-#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23, 23)
-#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24, 24)
-#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25, 25)
-#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26, 26)
-#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27, 27)
-#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28, 28)
-#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30, 30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31, 31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32, 32)
-#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33, 33)
-#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34, 34)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36, 36)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37, 37)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38, 38)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39, 39)
-#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40, 40)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41, 41)
-#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42, 42)
-#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44, 44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47, 47)
-#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48, 48)
-#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49, 49)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50, 50)
-#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51, 51)
-
-#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c
deleted file mode 100644 (file)
index 1aa7931..0000000
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Completion queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *cq_cache;
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
-{
-       unsigned int qp_num = qp->real_qp_num;
-       unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-       hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-
-       ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
-                cq->cq_number, qp_num);
-
-       return 0;
-}
-
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
-{
-       int ret = -EINVAL;
-       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-       struct hlist_node *iter;
-       struct ehca_qp *qp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-       hlist_for_each(iter, &cq->qp_hashtab[key]) {
-               qp = hlist_entry(iter, struct ehca_qp, list_entries);
-               if (qp->real_qp_num == real_qp_num) {
-                       hlist_del(iter);
-                       ehca_dbg(cq->ib_cq.device,
-                                "removed qp from cq .cq_num=%x real_qp_num=%x",
-                                cq->cq_number, real_qp_num);
-                       ret = 0;
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-       if (ret)
-               ehca_err(cq->ib_cq.device,
-                        "qp not found cq_num=%x real_qp_num=%x",
-                        cq->cq_number, real_qp_num);
-
-       return ret;
-}
-
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
-{
-       struct ehca_qp *ret = NULL;
-       unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-       struct hlist_node *iter;
-       struct ehca_qp *qp;
-       hlist_for_each(iter, &cq->qp_hashtab[key]) {
-               qp = hlist_entry(iter, struct ehca_qp, list_entries);
-               if (qp->real_qp_num == real_qp_num) {
-                       ret = qp;
-                       break;
-               }
-       }
-       return ret;
-}
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-                            const struct ib_cq_init_attr *attr,
-                            struct ib_ucontext *context,
-                            struct ib_udata *udata)
-{
-       int cqe = attr->cqe;
-       static const u32 additional_cqe = 20;
-       struct ib_cq *cq;
-       struct ehca_cq *my_cq;
-       struct ehca_shca *shca =
-               container_of(device, struct ehca_shca, ib_device);
-       struct ipz_adapter_handle adapter_handle;
-       struct ehca_alloc_cq_parms param; /* h_call's out parameters */
-       struct h_galpa gal;
-       void *vpage;
-       u32 counter;
-       u64 rpage, cqx_fec, h_ret;
-       int rc, i;
-       unsigned long flags;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
-               return ERR_PTR(-EINVAL);
-
-       if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
-               ehca_err(device, "Unable to create CQ, max number of %i "
-                       "CQs reached.", shca->max_num_cqs);
-               ehca_err(device, "To increase the maximum number of CQs "
-                       "use the number_of_cqs module parameter.\n");
-               return ERR_PTR(-ENOSPC);
-       }
-
-       my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
-       if (!my_cq) {
-               ehca_err(device, "Out of memory for ehca_cq struct device=%p",
-                        device);
-               atomic_dec(&shca->num_cqs);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
-
-       spin_lock_init(&my_cq->spinlock);
-       spin_lock_init(&my_cq->cb_lock);
-       spin_lock_init(&my_cq->task_lock);
-       atomic_set(&my_cq->nr_events, 0);
-       init_waitqueue_head(&my_cq->wait_completion);
-
-       cq = &my_cq->ib_cq;
-
-       adapter_handle = shca->ipz_hca_handle;
-       param.eq_handle = shca->eq.ipz_eq_handle;
-
-       idr_preload(GFP_KERNEL);
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       rc = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-       idr_preload_end();
-
-       if (rc < 0) {
-               cq = ERR_PTR(-ENOMEM);
-               ehca_err(device, "Can't allocate new idr entry. device=%p",
-                        device);
-               goto create_cq_exit1;
-       }
-       my_cq->token = rc;
-
-       /*
-        * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
-        * for receiving errors CQEs.
-        */
-       param.nr_cqe = cqe + additional_cqe;
-       h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_alloc_resource_cq() failed "
-                        "h_ret=%lli device=%p", h_ret, device);
-               cq = ERR_PTR(ehca2ib_return_code(h_ret));
-               goto create_cq_exit2;
-       }
-
-       rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
-                               EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
-       if (!rc) {
-               ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
-                        rc, device);
-               cq = ERR_PTR(-EINVAL);
-               goto create_cq_exit3;
-       }
-
-       for (counter = 0; counter < param.act_pages; counter++) {
-               vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-               if (!vpage) {
-                       ehca_err(device, "ipz_qpageit_get_inc() "
-                                "returns NULL device=%p", device);
-                       cq = ERR_PTR(-EAGAIN);
-                       goto create_cq_exit4;
-               }
-               rpage = __pa(vpage);
-
-               h_ret = hipz_h_register_rpage_cq(adapter_handle,
-                                                my_cq->ipz_cq_handle,
-                                                &my_cq->pf,
-                                                0,
-                                                0,
-                                                rpage,
-                                                1,
-                                                my_cq->galpas.
-                                                kernel);
-
-               if (h_ret < H_SUCCESS) {
-                       ehca_err(device, "hipz_h_register_rpage_cq() failed "
-                                "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
-                                "act_pages=%i", my_cq, my_cq->cq_number,
-                                h_ret, counter, param.act_pages);
-                       cq = ERR_PTR(-EINVAL);
-                       goto create_cq_exit4;
-               }
-
-               if (counter == (param.act_pages - 1)) {
-                       vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-                       if ((h_ret != H_SUCCESS) || vpage) {
-                               ehca_err(device, "Registration of pages not "
-                                        "complete ehca_cq=%p cq_num=%x "
-                                        "h_ret=%lli", my_cq, my_cq->cq_number,
-                                        h_ret);
-                               cq = ERR_PTR(-EAGAIN);
-                               goto create_cq_exit4;
-                       }
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(device, "Registration of page failed "
-                                        "ehca_cq=%p cq_num=%x h_ret=%lli "
-                                        "counter=%i act_pages=%i",
-                                        my_cq, my_cq->cq_number,
-                                        h_ret, counter, param.act_pages);
-                               cq = ERR_PTR(-ENOMEM);
-                               goto create_cq_exit4;
-                       }
-               }
-       }
-
-       ipz_qeit_reset(&my_cq->ipz_queue);
-
-       gal = my_cq->galpas.kernel;
-       cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
-       ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
-                my_cq, my_cq->cq_number, cqx_fec);
-
-       my_cq->ib_cq.cqe = my_cq->nr_of_entries =
-               param.act_nr_of_entries - additional_cqe;
-       my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
-
-       for (i = 0; i < QP_HASHTAB_LEN; i++)
-               INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
-
-       INIT_LIST_HEAD(&my_cq->sqp_err_list);
-       INIT_LIST_HEAD(&my_cq->rqp_err_list);
-
-       if (context) {
-               struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
-               struct ehca_create_cq_resp resp;
-               memset(&resp, 0, sizeof(resp));
-               resp.cq_number = my_cq->cq_number;
-               resp.token = my_cq->token;
-               resp.ipz_queue.qe_size = ipz_queue->qe_size;
-               resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
-               resp.ipz_queue.queue_length = ipz_queue->queue_length;
-               resp.ipz_queue.pagesize = ipz_queue->pagesize;
-               resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-               resp.fw_handle_ofs = (u32)
-                       (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
-               if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-                       ehca_err(device, "Copy to udata failed.");
-                       cq = ERR_PTR(-EFAULT);
-                       goto create_cq_exit4;
-               }
-       }
-
-       return cq;
-
-create_cq_exit4:
-       ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-
-create_cq_exit3:
-       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-       if (h_ret != H_SUCCESS)
-               ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
-                        "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
-
-create_cq_exit2:
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       idr_remove(&ehca_cq_idr, my_cq->token);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-create_cq_exit1:
-       kmem_cache_free(cq_cache, my_cq);
-
-       atomic_dec(&shca->num_cqs);
-       return cq;
-}
-
-int ehca_destroy_cq(struct ib_cq *cq)
-{
-       u64 h_ret;
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int cq_num = my_cq->cq_number;
-       struct ib_device *device = cq->device;
-       struct ehca_shca *shca = container_of(device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       unsigned long flags;
-
-       if (cq->uobject) {
-               if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
-                       ehca_err(device, "Resources still referenced in "
-                                "user space cq_num=%x", my_cq->cq_number);
-                       return -EINVAL;
-               }
-       }
-
-       /*
-        * remove the CQ from the idr first to make sure
-        * no more interrupt tasklets will touch this CQ
-        */
-       write_lock_irqsave(&ehca_cq_idr_lock, flags);
-       idr_remove(&ehca_cq_idr, my_cq->token);
-       write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-       /* now wait until all pending events have completed */
-       wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
-
-       /* nobody's using our CQ any longer -- we can destroy it */
-       h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
-       if (h_ret == H_R_STATE) {
-               /* cq in err: read err data and destroy it forcibly */
-               ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
-                        "state. Try to delete it forcibly.",
-                        my_cq, cq_num, my_cq->ipz_cq_handle.handle);
-               ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
-               h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-               if (h_ret == H_SUCCESS)
-                       ehca_dbg(device, "cq_num=%x deleted successfully.",
-                                cq_num);
-       }
-       if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
-                        "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
-               return ehca2ib_return_code(h_ret);
-       }
-       ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-       kmem_cache_free(cq_cache, my_cq);
-
-       atomic_dec(&shca->num_cqs);
-       return 0;
-}
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-       /* TODO: proper resize needs to be done */
-       ehca_err(cq->device, "not implemented yet");
-
-       return -EFAULT;
-}
-
-int ehca_init_cq_cache(void)
-{
-       cq_cache = kmem_cache_create("ehca_cache_cq",
-                                    sizeof(struct ehca_cq), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!cq_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_cq_cache(void)
-{
-       kmem_cache_destroy(cq_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c
deleted file mode 100644 (file)
index 90da674..0000000
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Event queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_qes.h"
-#include "hcp_if.h"
-#include "ipz_pt_fn.h"
-
-int ehca_create_eq(struct ehca_shca *shca,
-                  struct ehca_eq *eq,
-                  const enum ehca_eq_type type, const u32 length)
-{
-       int ret;
-       u64 h_ret;
-       u32 nr_pages;
-       u32 i;
-       void *vpage;
-       struct ib_device *ib_dev = &shca->ib_device;
-
-       spin_lock_init(&eq->spinlock);
-       spin_lock_init(&eq->irq_spinlock);
-       eq->is_initialized = 0;
-
-       if (type != EHCA_EQ && type != EHCA_NEQ) {
-               ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
-               return -EINVAL;
-       }
-       if (!length) {
-               ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
-               return -EINVAL;
-       }
-
-       h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
-                                        &eq->pf,
-                                        type,
-                                        length,
-                                        &eq->ipz_eq_handle,
-                                        &eq->length,
-                                        &nr_pages, &eq->ist);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
-               return -EINVAL;
-       }
-
-       ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
-                            EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
-       if (!ret) {
-               ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
-               goto create_eq_exit1;
-       }
-
-       for (i = 0; i < nr_pages; i++) {
-               u64 rpage;
-
-               vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-               if (!vpage)
-                       goto create_eq_exit2;
-
-               rpage = __pa(vpage);
-               h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
-                                                eq->ipz_eq_handle,
-                                                &eq->pf,
-                                                0, 0, rpage, 1);
-
-               if (i == (nr_pages - 1)) {
-                       /* last page */
-                       vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-                       if (h_ret != H_SUCCESS || vpage)
-                               goto create_eq_exit2;
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED)
-                               goto create_eq_exit2;
-               }
-       }
-
-       ipz_qeit_reset(&eq->ipz_queue);
-
-       /* register interrupt handlers and initialize work queues */
-       if (type == EHCA_EQ) {
-               tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
-
-               ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-                                         0, "ehca_eq",
-                                         (void *)shca);
-               if (ret < 0)
-                       ehca_err(ib_dev, "Can't map interrupt handler.");
-       } else if (type == EHCA_NEQ) {
-               tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
-
-               ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-                                         0, "ehca_neq",
-                                         (void *)shca);
-               if (ret < 0)
-                       ehca_err(ib_dev, "Can't map interrupt handler.");
-       }
-
-       eq->is_initialized = 1;
-
-       return 0;
-
-create_eq_exit2:
-       ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-create_eq_exit1:
-       hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-       return -EINVAL;
-}
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-       unsigned long flags;
-       void *eqe;
-
-       spin_lock_irqsave(&eq->spinlock, flags);
-       eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
-       spin_unlock_irqrestore(&eq->spinlock, flags);
-
-       return eqe;
-}
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-       unsigned long flags;
-       u64 h_ret;
-
-       ibmebus_free_irq(eq->ist, (void *)shca);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       eq->is_initialized = 0;
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       tasklet_kill(&eq->interrupt_task);
-
-       h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't free EQ resources.");
-               return -EINVAL;
-       }
-       ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c
deleted file mode 100644 (file)
index e8b1bb6..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HCA query functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/gfp.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static unsigned int limit_uint(unsigned int value)
-{
-       return min_t(unsigned int, value, INT_MAX);
-}
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                     struct ib_udata *uhw)
-{
-       int i, ret = 0;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_hca *rblock;
-
-       static const u32 cap_mapping[] = {
-               IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
-               IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
-               IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
-               IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
-               IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
-               IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
-               IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
-               IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
-               IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
-               IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
-               IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
-       };
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query device properties");
-               ret = -EINVAL;
-               goto query_device1;
-       }
-
-       memset(props, 0, sizeof(struct ib_device_attr));
-       props->page_size_cap   = shca->hca_cap_mr_pgsize;
-       props->fw_ver          = rblock->hw_ver;
-       props->max_mr_size     = rblock->max_mr_size;
-       props->vendor_id       = rblock->vendor_id >> 8;
-       props->vendor_part_id  = rblock->vendor_part_id >> 16;
-       props->hw_ver          = rblock->hw_ver;
-       props->max_qp          = limit_uint(rblock->max_qp);
-       props->max_qp_wr       = limit_uint(rblock->max_wqes_wq);
-       props->max_sge         = limit_uint(rblock->max_sge);
-       props->max_sge_rd      = limit_uint(rblock->max_sge_rd);
-       props->max_cq          = limit_uint(rblock->max_cq);
-       props->max_cqe         = limit_uint(rblock->max_cqe);
-       props->max_mr          = limit_uint(rblock->max_mr);
-       props->max_mw          = limit_uint(rblock->max_mw);
-       props->max_pd          = limit_uint(rblock->max_pd);
-       props->max_ah          = limit_uint(rblock->max_ah);
-       props->max_ee          = limit_uint(rblock->max_rd_ee_context);
-       props->max_rdd         = limit_uint(rblock->max_rd_domain);
-       props->max_fmr         = limit_uint(rblock->max_mr);
-       props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
-       props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
-       props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
-       props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
-       props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
-
-       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-               props->max_srq         = limit_uint(props->max_qp);
-               props->max_srq_wr      = limit_uint(props->max_qp_wr);
-               props->max_srq_sge     = 3;
-       }
-
-       props->max_pkeys           = 16;
-       /* Some FW versions say 0 here; insert sensible value in that case */
-       props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
-               min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
-       props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
-       props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
-       props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
-       props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
-       props->max_total_mcast_qp_attach
-               = limit_uint(rblock->max_total_mcast_qp_attach);
-
-       /* translate device capabilities */
-       props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
-               IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
-       for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
-               if (rblock->hca_cap_indicators & cap_mapping[i + 1])
-                       props->device_cap_flags |= cap_mapping[i];
-
-query_device1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
-{
-       switch (fw_mtu) {
-       case 0x1:
-               return IB_MTU_256;
-       case 0x2:
-               return IB_MTU_512;
-       case 0x3:
-               return IB_MTU_1024;
-       case 0x4:
-               return IB_MTU_2048;
-       case 0x5:
-               return IB_MTU_4096;
-       default:
-               ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
-                        fw_mtu);
-               return 0;
-       }
-}
-
-static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
-{
-       switch (vl_cap) {
-       case 0x1:
-               return 1;
-       case 0x2:
-               return 2;
-       case 0x3:
-               return 4;
-       case 0x4:
-               return 8;
-       case 0x5:
-               return 15;
-       default:
-               ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
-                        vl_cap);
-               return 0;
-       }
-}
-
-int ehca_query_port(struct ib_device *ibdev,
-                   u8 port, struct ib_port_attr *props)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_port *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_port1;
-       }
-
-       memset(props, 0, sizeof(struct ib_port_attr));
-
-       props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
-       props->port_cap_flags  = rblock->capability_mask;
-       props->gid_tbl_len     = rblock->gid_tbl_len;
-       if (rblock->max_msg_sz)
-               props->max_msg_sz      = rblock->max_msg_sz;
-       else
-               props->max_msg_sz      = 0x1 << 31;
-       props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
-       props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
-       props->pkey_tbl_len    = rblock->pkey_tbl_len;
-       props->lid             = rblock->lid;
-       props->sm_lid          = rblock->sm_lid;
-       props->lmc             = rblock->lmc;
-       props->sm_sl           = rblock->sm_sl;
-       props->subnet_timeout  = rblock->subnet_timeout;
-       props->init_type_reply = rblock->init_type_reply;
-       props->max_vl_num      = map_number_of_vls(shca, rblock->vl_cap);
-
-       if (rblock->state && rblock->phys_width) {
-               props->phys_state      = rblock->phys_pstate;
-               props->state           = rblock->phys_state;
-               props->active_width    = rblock->phys_width;
-               props->active_speed    = rblock->phys_speed;
-       } else {
-               /* old firmware releases don't report physical
-                * port info, so use default values
-                */
-               props->phys_state      = 5;
-               props->state           = rblock->state;
-               props->active_width    = IB_WIDTH_12X;
-               props->active_speed    = IB_SPEED_SDR;
-       }
-
-query_port1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_sma_attr(struct ehca_shca *shca,
-                       u8 port, struct ehca_sma_attr *attr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct hipz_query_port *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_sma_attr1;
-       }
-
-       memset(attr, 0, sizeof(struct ehca_sma_attr));
-
-       attr->lid    = rblock->lid;
-       attr->lmc    = rblock->lmc;
-       attr->sm_sl  = rblock->sm_sl;
-       attr->sm_lid = rblock->sm_lid;
-
-       attr->pkey_tbl_len = rblock->pkey_tbl_len;
-       memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
-
-query_sma_attr1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca;
-       struct hipz_query_port *rblock;
-
-       shca = container_of(ibdev, struct ehca_shca, ib_device);
-       if (index > 16) {
-               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-               return -EINVAL;
-       }
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_pkey1;
-       }
-
-       memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
-
-query_pkey1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port,
-                  int index, union ib_gid *gid)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-                                             ib_device);
-       struct hipz_query_port *rblock;
-
-       if (index < 0 || index > 255) {
-               ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-               return -EINVAL;
-       }
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto query_gid1;
-       }
-
-       memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
-       memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
-
-query_gid1:
-       ehca_free_fw_ctrlblock(rblock);
-
-       return ret;
-}
-
-static const u32 allowed_port_caps = (
-       IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
-       IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
-       IB_PORT_VENDOR_CLASS_SUP);
-
-int ehca_modify_port(struct ib_device *ibdev,
-                    u8 port, int port_modify_mask,
-                    struct ib_port_modify *props)
-{
-       int ret = 0;
-       struct ehca_shca *shca;
-       struct hipz_query_port *rblock;
-       u32 cap;
-       u64 hret;
-
-       shca = container_of(ibdev, struct ehca_shca, ib_device);
-       if ((props->set_port_cap_mask | props->clr_port_cap_mask)
-           & ~allowed_port_caps) {
-               ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
-                        "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
-                        props->clr_port_cap_mask, allowed_port_caps);
-               return -EINVAL;
-       }
-
-       if (mutex_lock_interruptible(&shca->modify_mutex))
-               return -ERESTARTSYS;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-               ret = -ENOMEM;
-               goto modify_port1;
-       }
-
-       hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-       if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query port properties");
-               ret = -EINVAL;
-               goto modify_port2;
-       }
-
-       cap = (rblock->capability_mask | props->set_port_cap_mask)
-               & ~props->clr_port_cap_mask;
-
-       hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
-                                 cap, props->init_type, port_modify_mask);
-       if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Modify port failed  h_ret=%lli",
-                        hret);
-               ret = -EINVAL;
-       }
-
-modify_port2:
-       ehca_free_fw_ctrlblock(rblock);
-
-modify_port1:
-       mutex_unlock(&shca->modify_mutex);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c
deleted file mode 100644 (file)
index 8615d7c..0000000
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Functions for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <linux/smpboot.h>
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
-#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
-#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
-#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
-#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
-
-#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
-#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
-#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
-#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
-#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
-
-#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
-#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
-
-static void queue_comp_task(struct ehca_cq *__cq);
-
-static struct ehca_comp_pool *pool;
-
-static inline void comp_event_callback(struct ehca_cq *cq)
-{
-       if (!cq->ib_cq.comp_handler)
-               return;
-
-       spin_lock(&cq->cb_lock);
-       cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
-       spin_unlock(&cq->cb_lock);
-
-       return;
-}
-
-static void print_error_data(struct ehca_shca *shca, void *data,
-                            u64 *rblock, int length)
-{
-       u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
-       u64 resource = rblock[1];
-
-       switch (type) {
-       case 0x1: /* Queue Pair */
-       {
-               struct ehca_qp *qp = (struct ehca_qp *)data;
-
-               /* only print error data if AER is set */
-               if (rblock[6] == 0)
-                       return;
-
-               ehca_err(&shca->ib_device,
-                        "QP 0x%x (resource=%llx) has errors.",
-                        qp->ib_qp.qp_num, resource);
-               break;
-       }
-       case 0x4: /* Completion Queue */
-       {
-               struct ehca_cq *cq = (struct ehca_cq *)data;
-
-               ehca_err(&shca->ib_device,
-                        "CQ 0x%x (resource=%llx) has errors.",
-                        cq->cq_number, resource);
-               break;
-       }
-       default:
-               ehca_err(&shca->ib_device,
-                        "Unknown error type: %llx on %s.",
-                        type, shca->ib_device.name);
-               break;
-       }
-
-       ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
-       ehca_err(&shca->ib_device, "EHCA ----- error data begin "
-                "---------------------------------------------------");
-       ehca_dmp(rblock, length, "resource=%llx", resource);
-       ehca_err(&shca->ib_device, "EHCA ----- error data end "
-                "----------------------------------------------------");
-
-       return;
-}
-
-int ehca_error_data(struct ehca_shca *shca, void *data,
-                   u64 resource)
-{
-
-       unsigned long ret;
-       u64 *rblock;
-       unsigned long block_count;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
-               ret = -ENOMEM;
-               goto error_data1;
-       }
-
-       /* rblock must be 4K aligned and should be 4K large */
-       ret = hipz_h_error_data(shca->ipz_hca_handle,
-                               resource,
-                               rblock,
-                               &block_count);
-
-       if (ret == H_R_STATE)
-               ehca_err(&shca->ib_device,
-                        "No error data is available: %llx.", resource);
-       else if (ret == H_SUCCESS) {
-               int length;
-
-               length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
-
-               if (length > EHCA_PAGESIZE)
-                       length = EHCA_PAGESIZE;
-
-               print_error_data(shca, data, rblock, length);
-       } else
-               ehca_err(&shca->ib_device,
-                        "Error data could not be fetched: %llx", resource);
-
-       ehca_free_fw_ctrlblock(rblock);
-
-error_data1:
-       return ret;
-
-}
-
-static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
-                             enum ib_event_type event_type)
-{
-       struct ib_event event;
-
-       /* PATH_MIG without the QP ever having been armed is false alarm */
-       if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
-               return;
-
-       event.device = &shca->ib_device;
-       event.event = event_type;
-
-       if (qp->ext_type == EQPT_SRQ) {
-               if (!qp->ib_srq.event_handler)
-                       return;
-
-               event.element.srq = &qp->ib_srq;
-               qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
-       } else {
-               if (!qp->ib_qp.event_handler)
-                       return;
-
-               event.element.qp = &qp->ib_qp;
-               qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
-       }
-}
-
-static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
-                             enum ib_event_type event_type, int fatal)
-{
-       struct ehca_qp *qp;
-       u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
-
-       read_lock(&ehca_qp_idr_lock);
-       qp = idr_find(&ehca_qp_idr, token);
-       if (qp)
-               atomic_inc(&qp->nr_events);
-       read_unlock(&ehca_qp_idr_lock);
-
-       if (!qp)
-               return;
-
-       if (fatal)
-               ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
-
-       dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
-                         IB_EVENT_SRQ_ERR : event_type);
-
-       /*
-        * eHCA only processes one WQE at a time for SRQ base QPs,
-        * so the last WQE has been processed as soon as the QP enters
-        * error state.
-        */
-       if (fatal && qp->ext_type == EQPT_SRQBASE)
-               dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
-
-       if (atomic_dec_and_test(&qp->nr_events))
-               wake_up(&qp->wait_completion);
-       return;
-}
-
-static void cq_event_callback(struct ehca_shca *shca,
-                             u64 eqe)
-{
-       struct ehca_cq *cq;
-       u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
-
-       read_lock(&ehca_cq_idr_lock);
-       cq = idr_find(&ehca_cq_idr, token);
-       if (cq)
-               atomic_inc(&cq->nr_events);
-       read_unlock(&ehca_cq_idr_lock);
-
-       if (!cq)
-               return;
-
-       ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
-
-       if (atomic_dec_and_test(&cq->nr_events))
-               wake_up(&cq->wait_completion);
-
-       return;
-}
-
-static void parse_identifier(struct ehca_shca *shca, u64 eqe)
-{
-       u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
-
-       switch (identifier) {
-       case 0x02: /* path migrated */
-               qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
-               break;
-       case 0x03: /* communication established */
-               qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
-               break;
-       case 0x04: /* send queue drained */
-               qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
-               break;
-       case 0x05: /* QP error */
-       case 0x06: /* QP error */
-               qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
-               break;
-       case 0x07: /* CQ error */
-       case 0x08: /* CQ error */
-               cq_event_callback(shca, eqe);
-               break;
-       case 0x09: /* MRMWPTE error */
-               ehca_err(&shca->ib_device, "MRMWPTE error.");
-               break;
-       case 0x0A: /* port event */
-               ehca_err(&shca->ib_device, "Port event.");
-               break;
-       case 0x0B: /* MR access error */
-               ehca_err(&shca->ib_device, "MR access error.");
-               break;
-       case 0x0C: /* EQ error */
-               ehca_err(&shca->ib_device, "EQ error.");
-               break;
-       case 0x0D: /* P/Q_Key mismatch */
-               ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
-               break;
-       case 0x10: /* sampling complete */
-               ehca_err(&shca->ib_device, "Sampling complete.");
-               break;
-       case 0x11: /* unaffiliated access error */
-               ehca_err(&shca->ib_device, "Unaffiliated access error.");
-               break;
-       case 0x12: /* path migrating */
-               ehca_err(&shca->ib_device, "Path migrating.");
-               break;
-       case 0x13: /* interface trace stopped */
-               ehca_err(&shca->ib_device, "Interface trace stopped.");
-               break;
-       case 0x14: /* first error capture info available */
-               ehca_info(&shca->ib_device, "First error capture available");
-               break;
-       case 0x15: /* SRQ limit reached */
-               qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
-                        identifier, shca->ib_device.name);
-               break;
-       }
-
-       return;
-}
-
-static void dispatch_port_event(struct ehca_shca *shca, int port_num,
-                               enum ib_event_type type, const char *msg)
-{
-       struct ib_event event;
-
-       ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
-       event.device = &shca->ib_device;
-       event.event = type;
-       event.element.port_num = port_num;
-       ib_dispatch_event(&event);
-}
-
-static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
-{
-       struct ehca_sma_attr  new_attr;
-       struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
-
-       ehca_query_sma_attr(shca, port_num, &new_attr);
-
-       if (new_attr.sm_sl  != old_attr->sm_sl ||
-           new_attr.sm_lid != old_attr->sm_lid)
-               dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
-                                   "SM changed");
-
-       if (new_attr.lid != old_attr->lid ||
-           new_attr.lmc != old_attr->lmc)
-               dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
-                                   "LID changed");
-
-       if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
-           memcmp(new_attr.pkeys, old_attr->pkeys,
-                  sizeof(u16) * new_attr.pkey_tbl_len))
-               dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
-                                   "P_Key changed");
-
-       *old_attr = new_attr;
-}
-
-/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
-static int replay_modify_qp(struct ehca_sport *sport)
-{
-       int aqp1_destroyed;
-       unsigned long flags;
-
-       spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-
-       aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
-
-       if (sport->ibqp_sqp[IB_QPT_SMI])
-               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
-       if (!aqp1_destroyed)
-               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
-
-       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
-       return aqp1_destroyed;
-}
-
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
-{
-       u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
-       u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
-       u8 spec_event;
-       struct ehca_sport *sport = &shca->sport[port - 1];
-
-       switch (ec) {
-       case 0x30: /* port availability change */
-               if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-                       /* only replay modify_qp calls in autodetect mode;
-                        * if AQP1 was destroyed, the port is already down
-                        * again and we can drop the event.
-                        */
-                       if (ehca_nr_ports < 0)
-                               if (replay_modify_qp(sport))
-                                       break;
-
-                       sport->port_state = IB_PORT_ACTIVE;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-                                           "is active");
-                       ehca_query_sma_attr(shca, port, &sport->saved_attr);
-               } else {
-                       sport->port_state = IB_PORT_DOWN;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-                                           "is inactive");
-               }
-               break;
-       case 0x31:
-               /* port configuration change
-                * disruptive change is caused by
-                * LID, PKEY or SM change
-                */
-               if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
-                       ehca_warn(&shca->ib_device, "disruptive port "
-                                 "%d configuration change", port);
-
-                       sport->port_state = IB_PORT_DOWN;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-                                           "is inactive");
-
-                       sport->port_state = IB_PORT_ACTIVE;
-                       dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-                                           "is active");
-                       ehca_query_sma_attr(shca, port,
-                                           &sport->saved_attr);
-               } else
-                       notify_port_conf_change(shca, port);
-               break;
-       case 0x32: /* adapter malfunction */
-               ehca_err(&shca->ib_device, "Adapter malfunction.");
-               break;
-       case 0x33:  /* trace stopped */
-               ehca_err(&shca->ib_device, "Traced stopped.");
-               break;
-       case 0x34: /* util async event */
-               spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
-               if (spec_event == 0x80) /* client reregister required */
-                       dispatch_port_event(shca, port,
-                                           IB_EVENT_CLIENT_REREGISTER,
-                                           "client reregister req.");
-               else
-                       ehca_warn(&shca->ib_device, "Unknown util async "
-                                 "event %x on port %x", spec_event, port);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
-                        ec, shca->ib_device.name);
-               break;
-       }
-
-       return;
-}
-
-static inline void reset_eq_pending(struct ehca_cq *cq)
-{
-       u64 CQx_EP;
-       struct h_galpa gal = cq->galpas.kernel;
-
-       hipz_galpa_store_cq(gal, cqx_ep, 0x0);
-       CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
-
-       return;
-}
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-       tasklet_hi_schedule(&shca->neq.interrupt_task);
-
-       return IRQ_HANDLED;
-}
-
-void ehca_tasklet_neq(unsigned long data)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)data;
-       struct ehca_eqe *eqe;
-       u64 ret;
-
-       eqe = ehca_poll_eq(shca, &shca->neq);
-
-       while (eqe) {
-               if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
-                       parse_ec(shca, eqe->entry);
-
-               eqe = ehca_poll_eq(shca, &shca->neq);
-       }
-
-       ret = hipz_h_reset_event(shca->ipz_hca_handle,
-                                shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
-
-       if (ret != H_SUCCESS)
-               ehca_err(&shca->ib_device, "Can't clear notification events.");
-
-       return;
-}
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
-{
-       struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-       tasklet_hi_schedule(&shca->eq.interrupt_task);
-
-       return IRQ_HANDLED;
-}
-
-
-static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
-{
-       u64 eqe_value;
-       u32 token;
-       struct ehca_cq *cq;
-
-       eqe_value = eqe->entry;
-       ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
-       if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-               ehca_dbg(&shca->ib_device, "Got completion event");
-               token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-               read_lock(&ehca_cq_idr_lock);
-               cq = idr_find(&ehca_cq_idr, token);
-               if (cq)
-                       atomic_inc(&cq->nr_events);
-               read_unlock(&ehca_cq_idr_lock);
-               if (cq == NULL) {
-                       ehca_err(&shca->ib_device,
-                                "Invalid eqe for non-existing cq token=%x",
-                                token);
-                       return;
-               }
-               reset_eq_pending(cq);
-               if (ehca_scaling_code)
-                       queue_comp_task(cq);
-               else {
-                       comp_event_callback(cq);
-                       if (atomic_dec_and_test(&cq->nr_events))
-                               wake_up(&cq->wait_completion);
-               }
-       } else {
-               ehca_dbg(&shca->ib_device, "Got non completion event");
-               parse_identifier(shca, eqe_value);
-       }
-}
-
-void ehca_process_eq(struct ehca_shca *shca, int is_irq)
-{
-       struct ehca_eq *eq = &shca->eq;
-       struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
-       u64 eqe_value, ret;
-       int eqe_cnt, i;
-       int eq_empty = 0;
-
-       spin_lock(&eq->irq_spinlock);
-       if (is_irq) {
-               const int max_query_cnt = 100;
-               int query_cnt = 0;
-               int int_state = 1;
-               do {
-                       int_state = hipz_h_query_int_state(
-                               shca->ipz_hca_handle, eq->ist);
-                       query_cnt++;
-                       iosync();
-               } while (int_state && query_cnt < max_query_cnt);
-               if (unlikely((query_cnt == max_query_cnt)))
-                       ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
-                                int_state, query_cnt);
-       }
-
-       /* read out all eqes */
-       eqe_cnt = 0;
-       do {
-               u32 token;
-               eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
-               if (!eqe_cache[eqe_cnt].eqe)
-                       break;
-               eqe_value = eqe_cache[eqe_cnt].eqe->entry;
-               if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-                       token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-                       read_lock(&ehca_cq_idr_lock);
-                       eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
-                       if (eqe_cache[eqe_cnt].cq)
-                               atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
-                       read_unlock(&ehca_cq_idr_lock);
-                       if (!eqe_cache[eqe_cnt].cq) {
-                               ehca_err(&shca->ib_device,
-                                        "Invalid eqe for non-existing cq "
-                                        "token=%x", token);
-                               continue;
-                       }
-               } else
-                       eqe_cache[eqe_cnt].cq = NULL;
-               eqe_cnt++;
-       } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
-       if (!eqe_cnt) {
-               if (is_irq)
-                       ehca_dbg(&shca->ib_device,
-                                "No eqe found for irq event");
-               goto unlock_irq_spinlock;
-       } else if (!is_irq) {
-               ret = hipz_h_eoi(eq->ist);
-               if (ret != H_SUCCESS)
-                       ehca_err(&shca->ib_device,
-                                "bad return code EOI -rc = %lld\n", ret);
-               ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
-       }
-       if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
-               ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
-       /* enable irq for new packets */
-       for (i = 0; i < eqe_cnt; i++) {
-               if (eq->eqe_cache[i].cq)
-                       reset_eq_pending(eq->eqe_cache[i].cq);
-       }
-       /* check eq */
-       spin_lock(&eq->spinlock);
-       eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
-       spin_unlock(&eq->spinlock);
-       /* call completion handler for cached eqes */
-       for (i = 0; i < eqe_cnt; i++)
-               if (eq->eqe_cache[i].cq) {
-                       if (ehca_scaling_code)
-                               queue_comp_task(eq->eqe_cache[i].cq);
-                       else {
-                               struct ehca_cq *cq = eq->eqe_cache[i].cq;
-                               comp_event_callback(cq);
-                               if (atomic_dec_and_test(&cq->nr_events))
-                                       wake_up(&cq->wait_completion);
-                       }
-               } else {
-                       ehca_dbg(&shca->ib_device, "Got non completion event");
-                       parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
-               }
-       /* poll eq if not empty */
-       if (eq_empty)
-               goto unlock_irq_spinlock;
-       do {
-               struct ehca_eqe *eqe;
-               eqe = ehca_poll_eq(shca, &shca->eq);
-               if (!eqe)
-                       break;
-               process_eqe(shca, eqe);
-       } while (1);
-
-unlock_irq_spinlock:
-       spin_unlock(&eq->irq_spinlock);
-}
-
-void ehca_tasklet_eq(unsigned long data)
-{
-       ehca_process_eq((struct ehca_shca*)data, 1);
-}
-
-static int find_next_online_cpu(struct ehca_comp_pool *pool)
-{
-       int cpu;
-       unsigned long flags;
-
-       WARN_ON_ONCE(!in_interrupt());
-       if (ehca_debug_level >= 3)
-               ehca_dmp(cpu_online_mask, cpumask_size(), "");
-
-       spin_lock_irqsave(&pool->last_cpu_lock, flags);
-       do {
-               cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
-               if (cpu >= nr_cpu_ids)
-                       cpu = cpumask_first(cpu_online_mask);
-               pool->last_cpu = cpu;
-       } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
-       spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
-
-       return cpu;
-}
-
-static void __queue_comp_task(struct ehca_cq *__cq,
-                             struct ehca_cpu_comp_task *cct,
-                             struct task_struct *thread)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cct->task_lock, flags);
-       spin_lock(&__cq->task_lock);
-
-       if (__cq->nr_callbacks == 0) {
-               __cq->nr_callbacks++;
-               list_add_tail(&__cq->entry, &cct->cq_list);
-               cct->cq_jobs++;
-               wake_up_process(thread);
-       } else
-               __cq->nr_callbacks++;
-
-       spin_unlock(&__cq->task_lock);
-       spin_unlock_irqrestore(&cct->task_lock, flags);
-}
-
-static void queue_comp_task(struct ehca_cq *__cq)
-{
-       int cpu_id;
-       struct ehca_cpu_comp_task *cct;
-       struct task_struct *thread;
-       int cq_jobs;
-       unsigned long flags;
-
-       cpu_id = find_next_online_cpu(pool);
-       BUG_ON(!cpu_online(cpu_id));
-
-       cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-       thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-       BUG_ON(!cct || !thread);
-
-       spin_lock_irqsave(&cct->task_lock, flags);
-       cq_jobs = cct->cq_jobs;
-       spin_unlock_irqrestore(&cct->task_lock, flags);
-       if (cq_jobs > 0) {
-               cpu_id = find_next_online_cpu(pool);
-               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-               thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-               BUG_ON(!cct || !thread);
-       }
-       __queue_comp_task(__cq, cct, thread);
-}
-
-static void run_comp_task(struct ehca_cpu_comp_task *cct)
-{
-       struct ehca_cq *cq;
-
-       while (!list_empty(&cct->cq_list)) {
-               cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-               spin_unlock_irq(&cct->task_lock);
-
-               comp_event_callback(cq);
-               if (atomic_dec_and_test(&cq->nr_events))
-                       wake_up(&cq->wait_completion);
-
-               spin_lock_irq(&cct->task_lock);
-               spin_lock(&cq->task_lock);
-               cq->nr_callbacks--;
-               if (!cq->nr_callbacks) {
-                       list_del_init(cct->cq_list.next);
-                       cct->cq_jobs--;
-               }
-               spin_unlock(&cq->task_lock);
-       }
-}
-
-static void comp_task_park(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-       struct ehca_cpu_comp_task *target;
-       struct task_struct *thread;
-       struct ehca_cq *cq, *tmp;
-       LIST_HEAD(list);
-
-       spin_lock_irq(&cct->task_lock);
-       cct->cq_jobs = 0;
-       cct->active = 0;
-       list_splice_init(&cct->cq_list, &list);
-       spin_unlock_irq(&cct->task_lock);
-
-       cpu = find_next_online_cpu(pool);
-       target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-       thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
-       spin_lock_irq(&target->task_lock);
-       list_for_each_entry_safe(cq, tmp, &list, entry) {
-               list_del(&cq->entry);
-               __queue_comp_task(cq, target, thread);
-       }
-       spin_unlock_irq(&target->task_lock);
-}
-
-static void comp_task_stop(unsigned int cpu, bool online)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-       spin_lock_irq(&cct->task_lock);
-       cct->cq_jobs = 0;
-       cct->active = 0;
-       WARN_ON(!list_empty(&cct->cq_list));
-       spin_unlock_irq(&cct->task_lock);
-}
-
-static int comp_task_should_run(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-       return cct->cq_jobs;
-}
-
-static void comp_task(unsigned int cpu)
-{
-       struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
-       int cql_empty;
-
-       spin_lock_irq(&cct->task_lock);
-       cql_empty = list_empty(&cct->cq_list);
-       if (!cql_empty) {
-               __set_current_state(TASK_RUNNING);
-               run_comp_task(cct);
-       }
-       spin_unlock_irq(&cct->task_lock);
-}
-
-static struct smp_hotplug_thread comp_pool_threads = {
-       .thread_should_run      = comp_task_should_run,
-       .thread_fn              = comp_task,
-       .thread_comm            = "ehca_comp/%u",
-       .cleanup                = comp_task_stop,
-       .park                   = comp_task_park,
-};
-
-int ehca_create_comp_pool(void)
-{
-       int cpu, ret = -ENOMEM;
-
-       if (!ehca_scaling_code)
-               return 0;
-
-       pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
-       if (pool == NULL)
-               return -ENOMEM;
-
-       spin_lock_init(&pool->last_cpu_lock);
-       pool->last_cpu = cpumask_any(cpu_online_mask);
-
-       pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
-       if (!pool->cpu_comp_tasks)
-               goto out_pool;
-
-       pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
-       if (!pool->cpu_comp_threads)
-               goto out_tasks;
-
-       for_each_present_cpu(cpu) {
-               struct ehca_cpu_comp_task *cct;
-
-               cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-               spin_lock_init(&cct->task_lock);
-               INIT_LIST_HEAD(&cct->cq_list);
-       }
-
-       comp_pool_threads.store = pool->cpu_comp_threads;
-       ret = smpboot_register_percpu_thread(&comp_pool_threads);
-       if (ret)
-               goto out_threads;
-
-       pr_info("eHCA scaling code enabled\n");
-       return ret;
-
-out_threads:
-       free_percpu(pool->cpu_comp_threads);
-out_tasks:
-       free_percpu(pool->cpu_comp_tasks);
-out_pool:
-       kfree(pool);
-       return ret;
-}
-
-void ehca_destroy_comp_pool(void)
-{
-       if (!ehca_scaling_code)
-               return;
-
-       smpboot_unregister_percpu_thread(&comp_pool_threads);
-
-       free_percpu(pool->cpu_comp_threads);
-       free_percpu(pool->cpu_comp_tasks);
-       kfree(pool);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h
deleted file mode 100644 (file)
index 5370199..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions and structs for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IRQ_H
-#define __EHCA_IRQ_H
-
-
-struct ehca_shca;
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
-void ehca_tasklet_neq(unsigned long data);
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
-void ehca_tasklet_eq(unsigned long data);
-void ehca_process_eq(struct ehca_shca *shca, int is_irq);
-
-struct ehca_cpu_comp_task {
-       struct list_head cq_list;
-       spinlock_t task_lock;
-       int cq_jobs;
-       int active;
-};
-
-struct ehca_comp_pool {
-       struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
-       struct task_struct * __percpu *cpu_comp_threads;
-       int last_cpu;
-       spinlock_t last_cpu_lock;
-};
-
-int ehca_create_comp_pool(void);
-void ehca_destroy_comp_pool(void);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
deleted file mode 100644 (file)
index 80e6a3d..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions for internal functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Dietmar Decker <ddecker@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IVERBS_H__
-#define __EHCA_IVERBS_H__
-
-#include "ehca_classes.h"
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                     struct ib_udata *uhw);
-
-int ehca_query_port(struct ib_device *ibdev, u8 port,
-                   struct ib_port_attr *props);
-
-enum rdma_protocol_type
-ehca_query_protocol(struct ib_device *device, u8 port_num);
-
-int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
-                       struct ehca_sma_attr *attr);
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
-                  union ib_gid *gid);
-
-int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
-                    struct ib_port_modify *props);
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-                           struct ib_ucontext *context,
-                           struct ib_udata *udata);
-
-int ehca_dealloc_pd(struct ib_pd *pd);
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_destroy_ah(struct ib_ah *ah);
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *phys_buf_array,
-                              int num_phys_buf,
-                              int mr_access_flags, u64 *iova_start);
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                              u64 virt, int mr_access_flags,
-                              struct ib_udata *udata);
-
-int ehca_rereg_phys_mr(struct ib_mr *mr,
-                      int mr_rereg_mask,
-                      struct ib_pd *pd,
-                      struct ib_phys_buf *phys_buf_array,
-                      int num_phys_buf, int mr_access_flags, u64 *iova_start);
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
-int ehca_dereg_mr(struct ib_mr *mr);
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind);
-
-int ehca_dealloc_mw(struct ib_mw *mw);
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-                             int mr_access_flags,
-                             struct ib_fmr_attr *fmr_attr);
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-                     u64 *page_list, int list_len, u64 iova);
-
-int ehca_unmap_fmr(struct list_head *fmr_list);
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr);
-
-enum ehca_eq_type {
-       EHCA_EQ = 0, /* Event Queue              */
-       EHCA_NEQ     /* Notification Event Queue */
-};
-
-int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
-                  enum ehca_eq_type type, const u32 length);
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-                            const struct ib_cq_init_attr *attr,
-                            struct ib_ucontext *context,
-                            struct ib_udata *udata);
-
-int ehca_destroy_cq(struct ib_cq *cq);
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
-
-int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *init_attr,
-                            struct ib_udata *udata);
-
-int ehca_destroy_qp(struct ib_qp *qp);
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-                  struct ib_udata *udata);
-
-int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
-                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-                  struct ib_send_wr **bad_send_wr);
-
-int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr);
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-                      struct ib_recv_wr *recv_wr,
-                      struct ib_recv_wr **bad_recv_wr);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-                              struct ib_srq_init_attr *init_attr,
-                              struct ib_udata *udata);
-
-int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
-                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-
-int ehca_destroy_srq(struct ib_srq *srq);
-
-u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
-                   struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-                                       struct ib_udata *udata);
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context);
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                    const struct ib_mad_hdr *in, size_t in_mad_size,
-                    struct ib_mad_hdr *out, size_t *out_mad_size,
-                    u16 *out_mad_pkey_index);
-
-void ehca_poll_eqs(unsigned long data);
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-                 enum ib_rate path_rate, u32 *ipd);
-
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
-
-#ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(gfp_t flags);
-void ehca_free_fw_ctrlblock(void *ptr);
-#else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
-#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
-#endif
-
-void ehca_recover_sqp(struct ib_qp *sqp);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
deleted file mode 100644 (file)
index 860b974..0000000
+++ /dev/null
@@ -1,1122 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  module start stop, hca detection
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <linux/slab.h>
-#endif
-
-#include <linux/notifier.h>
-#include <linux/memory.h>
-#include <rdma/ib_mad.h>
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-#define HCAD_VERSION "0029"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
-MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION(HCAD_VERSION);
-
-static bool ehca_open_aqp1    = 0;
-static int ehca_hw_level      = 0;
-static bool ehca_poll_all_eqs = 1;
-
-int ehca_debug_level   = 0;
-int ehca_nr_ports      = -1;
-bool ehca_use_hp_mr    = 0;
-int ehca_port_act_time = 30;
-int ehca_static_rate   = -1;
-bool ehca_scaling_code = 0;
-int ehca_lock_hcalls   = -1;
-int ehca_max_cq        = -1;
-int ehca_max_qp        = -1;
-
-module_param_named(open_aqp1,     ehca_open_aqp1,     bool, S_IRUGO);
-module_param_named(debug_level,   ehca_debug_level,   int,  S_IRUGO);
-module_param_named(hw_level,      ehca_hw_level,      int,  S_IRUGO);
-module_param_named(nr_ports,      ehca_nr_ports,      int,  S_IRUGO);
-module_param_named(use_hp_mr,     ehca_use_hp_mr,     bool, S_IRUGO);
-module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
-module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
-module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
-module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
-module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
-module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
-
-MODULE_PARM_DESC(open_aqp1,
-                "Open AQP1 on startup (default: no)");
-MODULE_PARM_DESC(debug_level,
-                "Amount of debug output (0: none (default), 1: traces, "
-                "2: some dumps, 3: lots)");
-MODULE_PARM_DESC(hw_level,
-                "Hardware level (0: autosensing (default), "
-                "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
-MODULE_PARM_DESC(nr_ports,
-                "number of connected ports (-1: autodetect (default), "
-                "1: port one only, 2: two ports)");
-MODULE_PARM_DESC(use_hp_mr,
-                "Use high performance MRs (default: no)");
-MODULE_PARM_DESC(port_act_time,
-                "Time to wait for port activation (default: 30 sec)");
-MODULE_PARM_DESC(poll_all_eqs,
-                "Poll all event queues periodically (default: yes)");
-MODULE_PARM_DESC(static_rate,
-                "Set permanent static rate (default: no static rate)");
-MODULE_PARM_DESC(scaling_code,
-                "Enable scaling code (default: no)");
-MODULE_PARM_DESC(lock_hcalls,
-                "Serialize all hCalls made by the driver "
-                "(default: autodetect)");
-MODULE_PARM_DESC(number_of_cqs,
-               "Max number of CQs which can be allocated "
-               "(default: autodetect)");
-MODULE_PARM_DESC(number_of_qps,
-               "Max number of QPs which can be allocated "
-               "(default: autodetect)");
-
-DEFINE_RWLOCK(ehca_qp_idr_lock);
-DEFINE_RWLOCK(ehca_cq_idr_lock);
-DEFINE_IDR(ehca_qp_idr);
-DEFINE_IDR(ehca_cq_idr);
-
-static LIST_HEAD(shca_list); /* list of all registered ehcas */
-DEFINE_SPINLOCK(shca_list_lock);
-
-static struct timer_list poll_eqs_timer;
-
-#ifdef CONFIG_PPC_64K_PAGES
-static struct kmem_cache *ctblk_cache;
-
-void *ehca_alloc_fw_ctrlblock(gfp_t flags)
-{
-       void *ret = kmem_cache_zalloc(ctblk_cache, flags);
-       if (!ret)
-               ehca_gen_err("Out of memory for ctblk");
-       return ret;
-}
-
-void ehca_free_fw_ctrlblock(void *ptr)
-{
-       if (ptr)
-               kmem_cache_free(ctblk_cache, ptr);
-
-}
-#endif
-
-int ehca2ib_return_code(u64 ehca_rc)
-{
-       switch (ehca_rc) {
-       case H_SUCCESS:
-               return 0;
-       case H_RESOURCE:             /* Resource in use */
-       case H_BUSY:
-               return -EBUSY;
-       case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-       case H_CONSTRAINED:          /* resource constraint */
-       case H_NO_MEM:
-               return -ENOMEM;
-       default:
-               return -EINVAL;
-       }
-}
-
-static int ehca_create_slab_caches(void)
-{
-       int ret;
-
-       ret = ehca_init_pd_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create PD SLAB cache.");
-               return ret;
-       }
-
-       ret = ehca_init_cq_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create CQ SLAB cache.");
-               goto create_slab_caches2;
-       }
-
-       ret = ehca_init_qp_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create QP SLAB cache.");
-               goto create_slab_caches3;
-       }
-
-       ret = ehca_init_av_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create AV SLAB cache.");
-               goto create_slab_caches4;
-       }
-
-       ret = ehca_init_mrmw_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create MR&MW SLAB cache.");
-               goto create_slab_caches5;
-       }
-
-       ret = ehca_init_small_qp_cache();
-       if (ret) {
-               ehca_gen_err("Cannot create small queue SLAB cache.");
-               goto create_slab_caches6;
-       }
-
-#ifdef CONFIG_PPC_64K_PAGES
-       ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
-                                       EHCA_PAGESIZE, H_CB_ALIGNMENT,
-                                       SLAB_HWCACHE_ALIGN,
-                                       NULL);
-       if (!ctblk_cache) {
-               ehca_gen_err("Cannot create ctblk SLAB cache.");
-               ehca_cleanup_small_qp_cache();
-               ret = -ENOMEM;
-               goto create_slab_caches6;
-       }
-#endif
-       return 0;
-
-create_slab_caches6:
-       ehca_cleanup_mrmw_cache();
-
-create_slab_caches5:
-       ehca_cleanup_av_cache();
-
-create_slab_caches4:
-       ehca_cleanup_qp_cache();
-
-create_slab_caches3:
-       ehca_cleanup_cq_cache();
-
-create_slab_caches2:
-       ehca_cleanup_pd_cache();
-
-       return ret;
-}
-
-static void ehca_destroy_slab_caches(void)
-{
-       ehca_cleanup_small_qp_cache();
-       ehca_cleanup_mrmw_cache();
-       ehca_cleanup_av_cache();
-       ehca_cleanup_qp_cache();
-       ehca_cleanup_cq_cache();
-       ehca_cleanup_pd_cache();
-#ifdef CONFIG_PPC_64K_PAGES
-       kmem_cache_destroy(ctblk_cache);
-#endif
-}
-
-#define EHCA_HCAAVER  EHCA_BMASK_IBM(32, 39)
-#define EHCA_REVID    EHCA_BMASK_IBM(40, 63)
-
-static struct cap_descr {
-       u64 mask;
-       char *descr;
-} hca_cap_descr[] = {
-       { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
-       { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
-       { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
-       { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
-       { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
-       { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
-       { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
-       { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
-       { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
-       { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
-       { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
-       { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
-       { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
-       { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
-       { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
-       { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
-       { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
-       { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
-};
-
-static int ehca_sense_attributes(struct ehca_shca *shca)
-{
-       int i, ret = 0;
-       u64 h_ret;
-       struct hipz_query_hca *rblock;
-       struct hipz_query_port *port;
-       const char *loc_code;
-
-       static const u32 pgsize_map[] = {
-               HCA_CAP_MR_PGSIZE_4K,  0x1000,
-               HCA_CAP_MR_PGSIZE_64K, 0x10000,
-               HCA_CAP_MR_PGSIZE_1M,  0x100000,
-               HCA_CAP_MR_PGSIZE_16M, 0x1000000,
-       };
-
-       ehca_gen_dbg("Probing adapter %s...",
-                    shca->ofdev->dev.of_node->full_name);
-       loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
-                                  NULL);
-       if (loc_code)
-               ehca_gen_dbg(" ... location lode=%s", loc_code);
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_gen_err("Cannot allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
-       if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query device properties. h_ret=%lli",
-                            h_ret);
-               ret = -EPERM;
-               goto sense_attributes1;
-       }
-
-       if (ehca_nr_ports == 1)
-               shca->num_ports = 1;
-       else
-               shca->num_ports = (u8)rblock->num_ports;
-
-       ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
-
-       if (ehca_hw_level == 0) {
-               u32 hcaaver;
-               u32 revid;
-
-               hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
-               revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
-
-               ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
-
-               if (hcaaver == 1) {
-                       if (revid <= 3)
-                               shca->hw_level = 0x10 | (revid + 1);
-                       else
-                               shca->hw_level = 0x14;
-               } else if (hcaaver == 2) {
-                       if (revid == 0)
-                               shca->hw_level = 0x21;
-                       else if (revid == 0x10)
-                               shca->hw_level = 0x22;
-                       else if (revid == 0x20 || revid == 0x21)
-                               shca->hw_level = 0x23;
-               }
-
-               if (!shca->hw_level) {
-                       ehca_gen_warn("unknown hardware version"
-                                     " - assuming default level");
-                       shca->hw_level = 0x22;
-               }
-       } else
-               shca->hw_level = ehca_hw_level;
-       ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
-
-       shca->hca_cap = rblock->hca_cap_indicators;
-       ehca_gen_dbg(" ... HCA capabilities:");
-       for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
-               if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
-                       ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
-
-       /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
-        * a firmware property, so it's valid across all adapters
-        */
-       if (ehca_lock_hcalls == -1)
-               ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
-                                       shca->hca_cap);
-
-       /* translate supported MR page sizes; always support 4K */
-       shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
-       for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
-               if (rblock->memory_page_size_supported & pgsize_map[i])
-                       shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
-
-       /* Set maximum number of CQs and QPs to calculate EQ size */
-       if (shca->max_num_qps == -1)
-               shca->max_num_qps = min_t(int, rblock->max_qp,
-                                         EHCA_MAX_NUM_QUEUES);
-       else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
-               ehca_gen_warn("The requested number of QPs is out of range "
-                             "(1 - %i) specified by HW. Value is set to %i",
-                             rblock->max_qp, rblock->max_qp);
-               shca->max_num_qps = rblock->max_qp;
-       }
-
-       if (shca->max_num_cqs == -1)
-               shca->max_num_cqs = min_t(int, rblock->max_cq,
-                                         EHCA_MAX_NUM_QUEUES);
-       else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
-               ehca_gen_warn("The requested number of CQs is out of range "
-                             "(1 - %i) specified by HW. Value is set to %i",
-                             rblock->max_cq, rblock->max_cq);
-       }
-
-       /* query max MTU from first port -- it's the same for all ports */
-       port = (struct hipz_query_port *)rblock;
-       h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
-       if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query port properties. h_ret=%lli",
-                            h_ret);
-               ret = -EPERM;
-               goto sense_attributes1;
-       }
-
-       shca->max_mtu = port->max_mtu;
-
-sense_attributes1:
-       ehca_free_fw_ctrlblock(rblock);
-       return ret;
-}
-
-static int init_node_guid(struct ehca_shca *shca)
-{
-       int ret = 0;
-       struct hipz_query_hca *rblock;
-
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!rblock) {
-               ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-               return -ENOMEM;
-       }
-
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Can't query device properties");
-               ret = -EINVAL;
-               goto init_node_guid1;
-       }
-
-       memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
-
-init_node_guid1:
-       ehca_free_fw_ctrlblock(rblock);
-       return ret;
-}
-
-static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num,
-                              struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = ehca_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-       return 0;
-}
-
-static int ehca_init_device(struct ehca_shca *shca)
-{
-       int ret;
-
-       ret = init_node_guid(shca);
-       if (ret)
-               return ret;
-
-       strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
-       shca->ib_device.owner               = THIS_MODULE;
-
-       shca->ib_device.uverbs_abi_ver      = 8;
-       shca->ib_device.uverbs_cmd_mask     =
-               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
-               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-
-       shca->ib_device.node_type           = RDMA_NODE_IB_CA;
-       shca->ib_device.phys_port_cnt       = shca->num_ports;
-       shca->ib_device.num_comp_vectors    = 1;
-       shca->ib_device.dma_device          = &shca->ofdev->dev;
-       shca->ib_device.query_device        = ehca_query_device;
-       shca->ib_device.query_port          = ehca_query_port;
-       shca->ib_device.query_gid           = ehca_query_gid;
-       shca->ib_device.query_pkey          = ehca_query_pkey;
-       /* shca->in_device.modify_device    = ehca_modify_device    */
-       shca->ib_device.modify_port         = ehca_modify_port;
-       shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
-       shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
-       shca->ib_device.alloc_pd            = ehca_alloc_pd;
-       shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
-       shca->ib_device.create_ah           = ehca_create_ah;
-       /* shca->ib_device.modify_ah        = ehca_modify_ah;       */
-       shca->ib_device.query_ah            = ehca_query_ah;
-       shca->ib_device.destroy_ah          = ehca_destroy_ah;
-       shca->ib_device.create_qp           = ehca_create_qp;
-       shca->ib_device.modify_qp           = ehca_modify_qp;
-       shca->ib_device.query_qp            = ehca_query_qp;
-       shca->ib_device.destroy_qp          = ehca_destroy_qp;
-       shca->ib_device.post_send           = ehca_post_send;
-       shca->ib_device.post_recv           = ehca_post_recv;
-       shca->ib_device.create_cq           = ehca_create_cq;
-       shca->ib_device.destroy_cq          = ehca_destroy_cq;
-       shca->ib_device.resize_cq           = ehca_resize_cq;
-       shca->ib_device.poll_cq             = ehca_poll_cq;
-       /* shca->ib_device.peek_cq          = ehca_peek_cq;         */
-       shca->ib_device.req_notify_cq       = ehca_req_notify_cq;
-       /* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
-       shca->ib_device.get_dma_mr          = ehca_get_dma_mr;
-       shca->ib_device.reg_phys_mr         = ehca_reg_phys_mr;
-       shca->ib_device.reg_user_mr         = ehca_reg_user_mr;
-       shca->ib_device.query_mr            = ehca_query_mr;
-       shca->ib_device.dereg_mr            = ehca_dereg_mr;
-       shca->ib_device.rereg_phys_mr       = ehca_rereg_phys_mr;
-       shca->ib_device.alloc_mw            = ehca_alloc_mw;
-       shca->ib_device.bind_mw             = ehca_bind_mw;
-       shca->ib_device.dealloc_mw          = ehca_dealloc_mw;
-       shca->ib_device.alloc_fmr           = ehca_alloc_fmr;
-       shca->ib_device.map_phys_fmr        = ehca_map_phys_fmr;
-       shca->ib_device.unmap_fmr           = ehca_unmap_fmr;
-       shca->ib_device.dealloc_fmr         = ehca_dealloc_fmr;
-       shca->ib_device.attach_mcast        = ehca_attach_mcast;
-       shca->ib_device.detach_mcast        = ehca_detach_mcast;
-       shca->ib_device.process_mad         = ehca_process_mad;
-       shca->ib_device.mmap                = ehca_mmap;
-       shca->ib_device.dma_ops             = &ehca_dma_mapping_ops;
-       shca->ib_device.get_port_immutable  = ehca_port_immutable;
-
-       if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-               shca->ib_device.uverbs_cmd_mask |=
-                       (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
-                       (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
-               shca->ib_device.create_srq          = ehca_create_srq;
-               shca->ib_device.modify_srq          = ehca_modify_srq;
-               shca->ib_device.query_srq           = ehca_query_srq;
-               shca->ib_device.destroy_srq         = ehca_destroy_srq;
-               shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
-       }
-
-       return ret;
-}
-
-static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
-{
-       struct ehca_sport *sport = &shca->sport[port - 1];
-       struct ib_cq *ibcq;
-       struct ib_qp *ibqp;
-       struct ib_qp_init_attr qp_init_attr;
-       struct ib_cq_init_attr cq_attr = {};
-       int ret;
-
-       if (sport->ibcq_aqp1) {
-               ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
-               return -EPERM;
-       }
-
-       cq_attr.cqe = 10;
-       ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1),
-                           &cq_attr);
-       if (IS_ERR(ibcq)) {
-               ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
-               return PTR_ERR(ibcq);
-       }
-       sport->ibcq_aqp1 = ibcq;
-
-       if (sport->ibqp_sqp[IB_QPT_GSI]) {
-               ehca_err(&shca->ib_device, "AQP1 QP is already created.");
-               ret = -EPERM;
-               goto create_aqp1;
-       }
-
-       memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
-       qp_init_attr.send_cq          = ibcq;
-       qp_init_attr.recv_cq          = ibcq;
-       qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
-       qp_init_attr.cap.max_send_wr  = 100;
-       qp_init_attr.cap.max_recv_wr  = 100;
-       qp_init_attr.cap.max_send_sge = 2;
-       qp_init_attr.cap.max_recv_sge = 1;
-       qp_init_attr.qp_type          = IB_QPT_GSI;
-       qp_init_attr.port_num         = port;
-       qp_init_attr.qp_context       = NULL;
-       qp_init_attr.event_handler    = NULL;
-       qp_init_attr.srq              = NULL;
-
-       ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
-       if (IS_ERR(ibqp)) {
-               ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
-               ret = PTR_ERR(ibqp);
-               goto create_aqp1;
-       }
-       sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
-
-       return 0;
-
-create_aqp1:
-       ib_destroy_cq(sport->ibcq_aqp1);
-       return ret;
-}
-
-static int ehca_destroy_aqp1(struct ehca_sport *sport)
-{
-       int ret;
-
-       ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
-       if (ret) {
-               ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
-               return ret;
-       }
-
-       ret = ib_destroy_cq(sport->ibcq_aqp1);
-       if (ret)
-               ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
-
-       return ret;
-}
-
-static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
-}
-
-static ssize_t ehca_store_debug_level(struct device_driver *ddp,
-                                     const char *buf, size_t count)
-{
-       int value = (*buf) - '0';
-       if (value >= 0 && value <= 9)
-               ehca_debug_level = value;
-       return 1;
-}
-
-static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
-                  ehca_show_debug_level, ehca_store_debug_level);
-
-static struct attribute *ehca_drv_attrs[] = {
-       &driver_attr_debug_level.attr,
-       NULL
-};
-
-static struct attribute_group ehca_drv_attr_grp = {
-       .attrs = ehca_drv_attrs
-};
-
-static const struct attribute_group *ehca_drv_attr_groups[] = {
-       &ehca_drv_attr_grp,
-       NULL,
-};
-
-#define EHCA_RESOURCE_ATTR(name)                                           \
-static ssize_t  ehca_show_##name(struct device *dev,                       \
-                                struct device_attribute *attr,            \
-                                char *buf)                                \
-{                                                                         \
-       struct ehca_shca *shca;                                            \
-       struct hipz_query_hca *rblock;                                     \
-       int data;                                                          \
-                                                                          \
-       shca = dev_get_drvdata(dev);                                       \
-                                                                          \
-       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);                      \
-       if (!rblock) {                                                     \
-               dev_err(dev, "Can't allocate rblock memory.\n");           \
-               return 0;                                                  \
-       }                                                                  \
-                                                                          \
-       if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
-               dev_err(dev, "Can't query device properties\n");           \
-               ehca_free_fw_ctrlblock(rblock);                            \
-               return 0;                                                  \
-       }                                                                  \
-                                                                          \
-       data = rblock->name;                                               \
-       ehca_free_fw_ctrlblock(rblock);                                    \
-                                                                          \
-       if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))     \
-               return snprintf(buf, 256, "1\n");                          \
-       else                                                               \
-               return snprintf(buf, 256, "%d\n", data);                   \
-                                                                          \
-}                                                                         \
-static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
-
-EHCA_RESOURCE_ATTR(num_ports);
-EHCA_RESOURCE_ATTR(hw_ver);
-EHCA_RESOURCE_ATTR(max_eq);
-EHCA_RESOURCE_ATTR(cur_eq);
-EHCA_RESOURCE_ATTR(max_cq);
-EHCA_RESOURCE_ATTR(cur_cq);
-EHCA_RESOURCE_ATTR(max_qp);
-EHCA_RESOURCE_ATTR(cur_qp);
-EHCA_RESOURCE_ATTR(max_mr);
-EHCA_RESOURCE_ATTR(cur_mr);
-EHCA_RESOURCE_ATTR(max_mw);
-EHCA_RESOURCE_ATTR(cur_mw);
-EHCA_RESOURCE_ATTR(max_pd);
-EHCA_RESOURCE_ATTR(max_ah);
-
-static ssize_t ehca_show_adapter_handle(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       struct ehca_shca *shca = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
-
-}
-static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
-
-static struct attribute *ehca_dev_attrs[] = {
-       &dev_attr_adapter_handle.attr,
-       &dev_attr_num_ports.attr,
-       &dev_attr_hw_ver.attr,
-       &dev_attr_max_eq.attr,
-       &dev_attr_cur_eq.attr,
-       &dev_attr_max_cq.attr,
-       &dev_attr_cur_cq.attr,
-       &dev_attr_max_qp.attr,
-       &dev_attr_cur_qp.attr,
-       &dev_attr_max_mr.attr,
-       &dev_attr_cur_mr.attr,
-       &dev_attr_max_mw.attr,
-       &dev_attr_cur_mw.attr,
-       &dev_attr_max_pd.attr,
-       &dev_attr_max_ah.attr,
-       NULL
-};
-
-static struct attribute_group ehca_dev_attr_grp = {
-       .attrs = ehca_dev_attrs
-};
-
-static int ehca_probe(struct platform_device *dev)
-{
-       struct ehca_shca *shca;
-       const u64 *handle;
-       struct ib_pd *ibpd;
-       int ret, i, eq_size;
-       unsigned long flags;
-
-       handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
-       if (!handle) {
-               ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
-                            dev->dev.of_node->full_name);
-               return -ENODEV;
-       }
-
-       if (!(*handle)) {
-               ehca_gen_err("Wrong eHCA handle for adapter: %s.",
-                            dev->dev.of_node->full_name);
-               return -ENODEV;
-       }
-
-       shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
-       if (!shca) {
-               ehca_gen_err("Cannot allocate shca memory.");
-               return -ENOMEM;
-       }
-
-       mutex_init(&shca->modify_mutex);
-       atomic_set(&shca->num_cqs, 0);
-       atomic_set(&shca->num_qps, 0);
-       shca->max_num_qps = ehca_max_qp;
-       shca->max_num_cqs = ehca_max_cq;
-
-       for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
-               spin_lock_init(&shca->sport[i].mod_sqp_lock);
-
-       shca->ofdev = dev;
-       shca->ipz_hca_handle.handle = *handle;
-       dev_set_drvdata(&dev->dev, shca);
-
-       ret = ehca_sense_attributes(shca);
-       if (ret < 0) {
-               ehca_gen_err("Cannot sense eHCA attributes.");
-               goto probe1;
-       }
-
-       ret = ehca_init_device(shca);
-       if (ret) {
-               ehca_gen_err("Cannot init ehca  device struct");
-               goto probe1;
-       }
-
-       eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
-       /* create event queues */
-       ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create EQ.");
-               goto probe1;
-       }
-
-       ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create NEQ.");
-               goto probe3;
-       }
-
-       /* create internal protection domain */
-       ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
-       if (IS_ERR(ibpd)) {
-               ehca_err(&shca->ib_device, "Cannot create internal PD.");
-               ret = PTR_ERR(ibpd);
-               goto probe4;
-       }
-
-       shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
-       shca->pd->ib_pd.device = &shca->ib_device;
-
-       /* create internal max MR */
-       ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
-
-       if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
-                        ret);
-               goto probe5;
-       }
-
-       ret = ib_register_device(&shca->ib_device, NULL);
-       if (ret) {
-               ehca_err(&shca->ib_device,
-                        "ib_register_device() failed ret=%i", ret);
-               goto probe6;
-       }
-
-       /* create AQP1 for port 1 */
-       if (ehca_open_aqp1 == 1) {
-               shca->sport[0].port_state = IB_PORT_DOWN;
-               ret = ehca_create_aqp1(shca, 1);
-               if (ret) {
-                       ehca_err(&shca->ib_device,
-                                "Cannot create AQP1 for port 1.");
-                       goto probe7;
-               }
-       }
-
-       /* create AQP1 for port 2 */
-       if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
-               shca->sport[1].port_state = IB_PORT_DOWN;
-               ret = ehca_create_aqp1(shca, 2);
-               if (ret) {
-                       ehca_err(&shca->ib_device,
-                                "Cannot create AQP1 for port 2.");
-                       goto probe8;
-               }
-       }
-
-       ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-       if (ret) /* only complain; we can live without attributes */
-               ehca_err(&shca->ib_device,
-                        "Cannot create device attributes  ret=%d", ret);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       list_add(&shca->shca_list, &shca_list);
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       return 0;
-
-probe8:
-       ret = ehca_destroy_aqp1(&shca->sport[0]);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy AQP1 for port 1. ret=%i", ret);
-
-probe7:
-       ib_unregister_device(&shca->ib_device);
-
-probe6:
-       ret = ehca_dereg_internal_maxmr(shca);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%x", ret);
-
-probe5:
-       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%x", ret);
-
-probe4:
-       ret = ehca_destroy_eq(shca, &shca->neq);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy NEQ. ret=%x", ret);
-
-probe3:
-       ret = ehca_destroy_eq(shca, &shca->eq);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy EQ. ret=%x", ret);
-
-probe1:
-       ib_dealloc_device(&shca->ib_device);
-
-       return -EINVAL;
-}
-
-static int ehca_remove(struct platform_device *dev)
-{
-       struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
-       unsigned long flags;
-       int ret;
-
-       sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-
-       if (ehca_open_aqp1 == 1) {
-               int i;
-               for (i = 0; i < shca->num_ports; i++) {
-                       ret = ehca_destroy_aqp1(&shca->sport[i]);
-                       if (ret)
-                               ehca_err(&shca->ib_device,
-                                        "Cannot destroy AQP1 for port %x "
-                                        "ret=%i", ret, i);
-               }
-       }
-
-       ib_unregister_device(&shca->ib_device);
-
-       ret = ehca_dereg_internal_maxmr(shca);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%i", ret);
-
-       ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-       if (ret)
-               ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%i", ret);
-
-       ret = ehca_destroy_eq(shca, &shca->eq);
-       if (ret)
-               ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
-
-       ret = ehca_destroy_eq(shca, &shca->neq);
-       if (ret)
-               ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
-
-       ib_dealloc_device(&shca->ib_device);
-
-       spin_lock_irqsave(&shca_list_lock, flags);
-       list_del(&shca->shca_list);
-       spin_unlock_irqrestore(&shca_list_lock, flags);
-
-       return ret;
-}
-
-static struct of_device_id ehca_device_table[] =
-{
-       {
-               .name       = "lhca",
-               .compatible = "IBM,lhca",
-       },
-       {},
-};
-MODULE_DEVICE_TABLE(of, ehca_device_table);
-
-static struct platform_driver ehca_driver = {
-       .probe       = ehca_probe,
-       .remove      = ehca_remove,
-       .driver = {
-               .name = "ehca",
-               .owner = THIS_MODULE,
-               .groups = ehca_drv_attr_groups,
-               .of_match_table = ehca_device_table,
-       },
-};
-
-void ehca_poll_eqs(unsigned long data)
-{
-       struct ehca_shca *shca;
-
-       spin_lock(&shca_list_lock);
-       list_for_each_entry(shca, &shca_list, shca_list) {
-               if (shca->eq.is_initialized) {
-                       /* call deadman proc only if eq ptr does not change */
-                       struct ehca_eq *eq = &shca->eq;
-                       int max = 3;
-                       volatile u64 q_ofs, q_ofs2;
-                       unsigned long flags;
-                       spin_lock_irqsave(&eq->spinlock, flags);
-                       q_ofs = eq->ipz_queue.current_q_offset;
-                       spin_unlock_irqrestore(&eq->spinlock, flags);
-                       do {
-                               spin_lock_irqsave(&eq->spinlock, flags);
-                               q_ofs2 = eq->ipz_queue.current_q_offset;
-                               spin_unlock_irqrestore(&eq->spinlock, flags);
-                               max--;
-                       } while (q_ofs == q_ofs2 && max > 0);
-                       if (q_ofs == q_ofs2)
-                               ehca_process_eq(shca, 0);
-               }
-       }
-       mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
-       spin_unlock(&shca_list_lock);
-}
-
-static int ehca_mem_notifier(struct notifier_block *nb,
-                            unsigned long action, void *data)
-{
-       static unsigned long ehca_dmem_warn_time;
-       unsigned long flags;
-
-       switch (action) {
-       case MEM_CANCEL_OFFLINE:
-       case MEM_CANCEL_ONLINE:
-       case MEM_ONLINE:
-       case MEM_OFFLINE:
-               return NOTIFY_OK;
-       case MEM_GOING_ONLINE:
-       case MEM_GOING_OFFLINE:
-               /* only ok if no hca is attached to the lpar */
-               spin_lock_irqsave(&shca_list_lock, flags);
-               if (list_empty(&shca_list)) {
-                       spin_unlock_irqrestore(&shca_list_lock, flags);
-                       return NOTIFY_OK;
-               } else {
-                       spin_unlock_irqrestore(&shca_list_lock, flags);
-                       if (printk_timed_ratelimit(&ehca_dmem_warn_time,
-                                                  30 * 1000))
-                               ehca_gen_err("DMEM operations are not allowed"
-                                            "in conjunction with eHCA");
-                       return NOTIFY_BAD;
-               }
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block ehca_mem_nb = {
-       .notifier_call = ehca_mem_notifier,
-};
-
-static int __init ehca_module_init(void)
-{
-       int ret;
-
-       printk(KERN_INFO "eHCA Infiniband Device Driver "
-              "(Version " HCAD_VERSION ")\n");
-
-       ret = ehca_create_comp_pool();
-       if (ret) {
-               ehca_gen_err("Cannot create comp pool.");
-               return ret;
-       }
-
-       ret = ehca_create_slab_caches();
-       if (ret) {
-               ehca_gen_err("Cannot create SLAB caches");
-               ret = -ENOMEM;
-               goto module_init1;
-       }
-
-       ret = ehca_create_busmap();
-       if (ret) {
-               ehca_gen_err("Cannot create busmap.");
-               goto module_init2;
-       }
-
-       ret = ibmebus_register_driver(&ehca_driver);
-       if (ret) {
-               ehca_gen_err("Cannot register eHCA device driver");
-               ret = -EINVAL;
-               goto module_init3;
-       }
-
-       ret = register_memory_notifier(&ehca_mem_nb);
-       if (ret) {
-               ehca_gen_err("Failed registering memory add/remove notifier");
-               goto module_init4;
-       }
-
-       if (ehca_poll_all_eqs != 1) {
-               ehca_gen_err("WARNING!!!");
-               ehca_gen_err("It is possible to lose interrupts.");
-       } else {
-               init_timer(&poll_eqs_timer);
-               poll_eqs_timer.function = ehca_poll_eqs;
-               poll_eqs_timer.expires = jiffies + HZ;
-               add_timer(&poll_eqs_timer);
-       }
-
-       return 0;
-
-module_init4:
-       ibmebus_unregister_driver(&ehca_driver);
-
-module_init3:
-       ehca_destroy_busmap();
-
-module_init2:
-       ehca_destroy_slab_caches();
-
-module_init1:
-       ehca_destroy_comp_pool();
-       return ret;
-};
-
-static void __exit ehca_module_exit(void)
-{
-       if (ehca_poll_all_eqs == 1)
-               del_timer_sync(&poll_eqs_timer);
-
-       ibmebus_unregister_driver(&ehca_driver);
-
-       unregister_memory_notifier(&ehca_mem_nb);
-
-       ehca_destroy_busmap();
-
-       ehca_destroy_slab_caches();
-
-       ehca_destroy_comp_pool();
-
-       idr_destroy(&ehca_cq_idr);
-       idr_destroy(&ehca_qp_idr);
-};
-
-module_init(ehca_module_init);
-module_exit(ehca_module_exit);
diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c
deleted file mode 100644 (file)
index cec1815..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  mcast  functions
- *
- *  Authors: Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define MAX_MC_LID 0xFFFE
-#define MIN_MC_LID 0xC000      /* Multicast limits */
-#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
-#define EHCA_VALID_MULTICAST_LID(lid) \
-       (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
-
-int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-                                             ib_device);
-       union ib_gid my_gid;
-       u64 subnet_prefix, interface_id, h_ret;
-
-       if (ibqp->qp_type != IB_QPT_UD) {
-               ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
-               return -EINVAL;
-       }
-
-       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-               ehca_err(ibqp->device, "invalid mulitcast gid");
-               return -EINVAL;
-       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-               return -EINVAL;
-       }
-
-       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-       interface_id = be64_to_cpu(my_gid.global.interface_id);
-       h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
-                                  my_qp->ipz_qp_handle,
-                                  my_qp->galpas.kernel,
-                                  lid, subnet_prefix, interface_id);
-       if (h_ret != H_SUCCESS)
-               ehca_err(ibqp->device,
-                        "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
-                        "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-       return ehca2ib_return_code(h_ret);
-}
-
-int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(ibqp->pd->device,
-                                             struct ehca_shca, ib_device);
-       union ib_gid my_gid;
-       u64 subnet_prefix, interface_id, h_ret;
-
-       if (ibqp->qp_type != IB_QPT_UD) {
-               ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
-               return -EINVAL;
-       }
-
-       if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-               ehca_err(ibqp->device, "invalid mulitcast gid");
-               return -EINVAL;
-       } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-               ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-               return -EINVAL;
-       }
-
-       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-       subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-       interface_id = be64_to_cpu(my_gid.global.interface_id);
-       h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
-                                  my_qp->ipz_qp_handle,
-                                  my_qp->galpas.kernel,
-                                  lid, subnet_prefix, interface_id);
-       if (h_ret != H_SUCCESS)
-               ehca_err(ibqp->device,
-                        "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
-                        "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-       return ehca2ib_return_code(h_ret);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
deleted file mode 100644 (file)
index 553e883..0000000
+++ /dev/null
@@ -1,2591 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <rdma/ib_umem.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "hcp_if.h"
-#include "hipz_hw.h"
-
-#define NUM_CHUNKS(length, chunk_size) \
-       (((length) + (chunk_size - 1)) / (chunk_size))
-
-/* max number of rpages (per hcall register_rpages) */
-#define MAX_RPAGES 512
-
-/* DMEM toleration management */
-#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
-#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
-#define EHCA_HUGEPAGESHIFT     34
-#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
-#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
-#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
-#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
-#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
-#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
-#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
-#define EHCA_DIR_MAP_SIZE (0x10000)
-#define EHCA_ENT_MAP_SIZE (0x10000)
-#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
-
-static unsigned long ehca_mr_len;
-
-/*
- * Memory map data structures
- */
-struct ehca_dir_bmap {
-       u64 ent[EHCA_MAP_ENTRIES];
-};
-struct ehca_top_bmap {
-       struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
-};
-struct ehca_bmap {
-       struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
-};
-
-static struct ehca_bmap *ehca_bmap;
-
-static struct kmem_cache *mr_cache;
-static struct kmem_cache *mw_cache;
-
-enum ehca_mr_pgsize {
-       EHCA_MR_PGSIZE4K  = 0x1000L,
-       EHCA_MR_PGSIZE64K = 0x10000L,
-       EHCA_MR_PGSIZE1M  = 0x100000L,
-       EHCA_MR_PGSIZE16M = 0x1000000L
-};
-
-#define EHCA_MR_PGSHIFT4K  12
-#define EHCA_MR_PGSHIFT64K 16
-#define EHCA_MR_PGSHIFT1M  20
-#define EHCA_MR_PGSHIFT16M 24
-
-static u64 ehca_map_vaddr(void *caddr);
-
-static u32 ehca_encode_hwpage_size(u32 pgsize)
-{
-       int log = ilog2(pgsize);
-       WARN_ON(log < 12 || log > 24 || log & 3);
-       return (log - 12) / 4;
-}
-
-static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
-{
-       return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
-}
-
-static struct ehca_mr *ehca_mr_new(void)
-{
-       struct ehca_mr *me;
-
-       me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
-       if (me)
-               spin_lock_init(&me->mrlock);
-       else
-               ehca_gen_err("alloc failed");
-
-       return me;
-}
-
-static void ehca_mr_delete(struct ehca_mr *me)
-{
-       kmem_cache_free(mr_cache, me);
-}
-
-static struct ehca_mw *ehca_mw_new(void)
-{
-       struct ehca_mw *me;
-
-       me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
-       if (me)
-               spin_lock_init(&me->mwlock);
-       else
-               ehca_gen_err("alloc failed");
-
-       return me;
-}
-
-static void ehca_mw_delete(struct ehca_mw *me)
-{
-       kmem_cache_free(mw_cache, me);
-}
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
-{
-       struct ib_mr *ib_mr;
-       int ret;
-       struct ehca_mr *e_maxmr;
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-
-       if (shca->maxmr) {
-               e_maxmr = ehca_mr_new();
-               if (!e_maxmr) {
-                       ehca_err(&shca->ib_device, "out of memory");
-                       ib_mr = ERR_PTR(-ENOMEM);
-                       goto get_dma_mr_exit0;
-               }
-
-               ret = ehca_reg_maxmr(shca, e_maxmr,
-                                    (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
-                                    mr_access_flags, e_pd,
-                                    &e_maxmr->ib.ib_mr.lkey,
-                                    &e_maxmr->ib.ib_mr.rkey);
-               if (ret) {
-                       ehca_mr_delete(e_maxmr);
-                       ib_mr = ERR_PTR(ret);
-                       goto get_dma_mr_exit0;
-               }
-               ib_mr = &e_maxmr->ib.ib_mr;
-       } else {
-               ehca_err(&shca->ib_device, "no internal max-MR exist!");
-               ib_mr = ERR_PTR(-EINVAL);
-               goto get_dma_mr_exit0;
-       }
-
-get_dma_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
-                        PTR_ERR(ib_mr), pd, mr_access_flags);
-       return ib_mr;
-} /* end ehca_get_dma_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *phys_buf_array,
-                              int num_phys_buf,
-                              int mr_access_flags,
-                              u64 *iova_start)
-{
-       struct ib_mr *ib_mr;
-       int ret;
-       struct ehca_mr *e_mr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-
-       u64 size;
-
-       if ((num_phys_buf <= 0) || !phys_buf_array) {
-               ehca_err(pd->device, "bad input values: num_phys_buf=%x "
-                        "phys_buf_array=%p", num_phys_buf, phys_buf_array);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-
-       /* check physical buffer list and calculate size */
-       ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
-                                           iova_start, &size);
-       if (ret) {
-               ib_mr = ERR_PTR(ret);
-               goto reg_phys_mr_exit0;
-       }
-       if ((size == 0) ||
-           (((u64)iova_start + size) < (u64)iova_start)) {
-               ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
-                        size, iova_start);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_phys_mr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(pd->device, "out of memory");
-               ib_mr = ERR_PTR(-ENOMEM);
-               goto reg_phys_mr_exit0;
-       }
-
-       /* register MR on HCA */
-       if (ehca_mr_is_maxmr(size, iova_start)) {
-               e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-               ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
-                                    e_pd, &e_mr->ib.ib_mr.lkey,
-                                    &e_mr->ib.ib_mr.rkey);
-               if (ret) {
-                       ib_mr = ERR_PTR(ret);
-                       goto reg_phys_mr_exit1;
-               }
-       } else {
-               struct ehca_mr_pginfo pginfo;
-               u32 num_kpages;
-               u32 num_hwpages;
-               u64 hw_pgsize;
-
-               num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
-                                       PAGE_SIZE);
-               /* for kernel space we try most possible pgsize */
-               hw_pgsize = ehca_get_max_hwpage_size(shca);
-               num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
-                                        hw_pgsize);
-               memset(&pginfo, 0, sizeof(pginfo));
-               pginfo.type = EHCA_MR_PGI_PHYS;
-               pginfo.num_kpages = num_kpages;
-               pginfo.hwpage_size = hw_pgsize;
-               pginfo.num_hwpages = num_hwpages;
-               pginfo.u.phy.num_phys_buf = num_phys_buf;
-               pginfo.u.phy.phys_buf_array = phys_buf_array;
-               pginfo.next_hwpage =
-                       ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-
-               ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
-                                 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-                                 &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-               if (ret) {
-                       ib_mr = ERR_PTR(ret);
-                       goto reg_phys_mr_exit1;
-               }
-       }
-
-       /* successful registration of all pages */
-       return &e_mr->ib.ib_mr;
-
-reg_phys_mr_exit1:
-       ehca_mr_delete(e_mr);
-reg_phys_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
-                        "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
-                        PTR_ERR(ib_mr), pd, phys_buf_array,
-                        num_phys_buf, mr_access_flags, iova_start);
-       return ib_mr;
-} /* end ehca_reg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                              u64 virt, int mr_access_flags,
-                              struct ib_udata *udata)
-{
-       struct ib_mr *ib_mr;
-       struct ehca_mr *e_mr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_mr_pginfo pginfo;
-       int ret, page_shift;
-       u32 num_kpages;
-       u32 num_hwpages;
-       u64 hwpage_size;
-
-       if (!pd) {
-               ehca_gen_err("bad pd=%p", pd);
-               return ERR_PTR(-EFAULT);
-       }
-
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
-
-       if (length == 0 || virt + length < virt) {
-               ehca_err(pd->device, "bad input values: length=%llx "
-                        "virt_base=%llx", length, virt);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(pd->device, "out of memory");
-               ib_mr = ERR_PTR(-ENOMEM);
-               goto reg_user_mr_exit0;
-       }
-
-       e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
-                                mr_access_flags, 0);
-       if (IS_ERR(e_mr->umem)) {
-               ib_mr = (void *)e_mr->umem;
-               goto reg_user_mr_exit1;
-       }
-
-       if (e_mr->umem->page_size != PAGE_SIZE) {
-               ehca_err(pd->device, "page size not supported, "
-                        "e_mr->umem->page_size=%x", e_mr->umem->page_size);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit2;
-       }
-
-       /* determine number of MR pages */
-       num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
-       /* select proper hw_pgsize */
-       page_shift = PAGE_SHIFT;
-       if (e_mr->umem->hugetlb) {
-               /* determine page_shift, clamp between 4K and 16M */
-               page_shift = (fls64(length - 1) + 3) & ~3;
-               page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
-                                EHCA_MR_PGSHIFT16M);
-       }
-       hwpage_size = 1UL << page_shift;
-
-       /* now that we have the desired page size, shift until it's
-        * supported, too. 4K is always supported, so this terminates.
-        */
-       while (!(hwpage_size & shca->hca_cap_mr_pgsize))
-               hwpage_size >>= 4;
-
-reg_user_mr_fallback:
-       num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
-       /* register MR on HCA */
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_USER;
-       pginfo.hwpage_size = hwpage_size;
-       pginfo.num_kpages = num_kpages;
-       pginfo.num_hwpages = num_hwpages;
-       pginfo.u.usr.region = e_mr->umem;
-       pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
-       pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
-       ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
-                         e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-                         &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-       if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
-               ehca_warn(pd->device, "failed to register mr "
-                         "with hwpage_size=%llx", hwpage_size);
-               ehca_info(pd->device, "try to register mr with "
-                         "kpage_size=%lx", PAGE_SIZE);
-               /*
-                * this means kpages are not contiguous for a hw page
-                * try kernel page size as fallback solution
-                */
-               hwpage_size = PAGE_SIZE;
-               goto reg_user_mr_fallback;
-       }
-       if (ret) {
-               ib_mr = ERR_PTR(ret);
-               goto reg_user_mr_exit2;
-       }
-
-       /* successful registration of all pages */
-       return &e_mr->ib.ib_mr;
-
-reg_user_mr_exit2:
-       ib_umem_release(e_mr->umem);
-reg_user_mr_exit1:
-       ehca_mr_delete(e_mr);
-reg_user_mr_exit0:
-       if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
-                        PTR_ERR(ib_mr), pd, mr_access_flags, udata);
-       return ib_mr;
-} /* end ehca_reg_user_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_phys_mr(struct ib_mr *mr,
-                      int mr_rereg_mask,
-                      struct ib_pd *pd,
-                      struct ib_phys_buf *phys_buf_array,
-                      int num_phys_buf,
-                      int mr_access_flags,
-                      u64 *iova_start)
-{
-       int ret;
-
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-       u64 new_size;
-       u64 *new_start;
-       u32 new_acl;
-       struct ehca_pd *new_pd;
-       u32 tmp_lkey, tmp_rkey;
-       unsigned long sl_flags;
-       u32 num_kpages = 0;
-       u32 num_hwpages = 0;
-       struct ehca_mr_pginfo pginfo;
-
-       if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
-               /* TODO not supported, because PHYP rereg hCall needs pages */
-               ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
-                        "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       if (mr_rereg_mask & IB_MR_REREG_PD) {
-               if (!pd) {
-                       ehca_err(mr->device, "rereg with bad pd, pd=%p "
-                                "mr_rereg_mask=%x", pd, mr_rereg_mask);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-       }
-
-       if ((mr_rereg_mask &
-            ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
-           (mr_rereg_mask == 0)) {
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       /* check other parameters */
-       if (e_mr == shca->maxmr) {
-               /* should be impossible, however reject to be sure */
-               ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
-                        "shca->maxmr=%p mr->lkey=%x",
-                        mr, shca->maxmr, mr->lkey);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
-               if (e_mr->flags & EHCA_MR_FLAG_FMR) {
-                       ehca_err(mr->device, "not supported for FMR, mr=%p "
-                                "flags=%x", mr, e_mr->flags);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-               if (!phys_buf_array || num_phys_buf <= 0) {
-                       ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
-                                " phys_buf_array=%p num_phys_buf=%x",
-                                mr_rereg_mask, phys_buf_array, num_phys_buf);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit0;
-               }
-       }
-       if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
-           (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
-                        "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
-               ret = -EINVAL;
-               goto rereg_phys_mr_exit0;
-       }
-
-       /* set requested values dependent on rereg request */
-       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-       new_start = e_mr->start;
-       new_size = e_mr->size;
-       new_acl = e_mr->acl;
-       new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-
-       if (mr_rereg_mask & IB_MR_REREG_TRANS) {
-               u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
-
-               new_start = iova_start; /* change address */
-               /* check physical buffer list and calculate size */
-               ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
-                                                   num_phys_buf, iova_start,
-                                                   &new_size);
-               if (ret)
-                       goto rereg_phys_mr_exit1;
-               if ((new_size == 0) ||
-                   (((u64)iova_start + new_size) < (u64)iova_start)) {
-                       ehca_err(mr->device, "bad input values: new_size=%llx "
-                                "iova_start=%p", new_size, iova_start);
-                       ret = -EINVAL;
-                       goto rereg_phys_mr_exit1;
-               }
-               num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
-                                       new_size, PAGE_SIZE);
-               num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
-                                        new_size, hw_pgsize);
-               memset(&pginfo, 0, sizeof(pginfo));
-               pginfo.type = EHCA_MR_PGI_PHYS;
-               pginfo.num_kpages = num_kpages;
-               pginfo.hwpage_size = hw_pgsize;
-               pginfo.num_hwpages = num_hwpages;
-               pginfo.u.phy.num_phys_buf = num_phys_buf;
-               pginfo.u.phy.phys_buf_array = phys_buf_array;
-               pginfo.next_hwpage =
-                       ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-       }
-       if (mr_rereg_mask & IB_MR_REREG_ACCESS)
-               new_acl = mr_access_flags;
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               new_pd = container_of(pd, struct ehca_pd, ib_pd);
-
-       ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
-                           new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-       if (ret)
-               goto rereg_phys_mr_exit1;
-
-       /* successful reregistration */
-       if (mr_rereg_mask & IB_MR_REREG_PD)
-               mr->pd = pd;
-       mr->lkey = tmp_lkey;
-       mr->rkey = tmp_rkey;
-
-rereg_phys_mr_exit1:
-       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-rereg_phys_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
-                        "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
-                        "iova_start=%p",
-                        ret, mr, mr_rereg_mask, pd, phys_buf_array,
-                        num_phys_buf, mr_access_flags, iova_start);
-       return ret;
-} /* end ehca_rereg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-       unsigned long sl_flags;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-               ret = -EINVAL;
-               goto query_mr_exit0;
-       }
-
-       memset(mr_attr, 0, sizeof(struct ib_mr_attr));
-       spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-
-       h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
-                        "hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, mr, shca->ipz_hca_handle.handle,
-                        e_mr->ipz_mr_handle.handle, mr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto query_mr_exit1;
-       }
-       mr_attr->pd = mr->pd;
-       mr_attr->device_virt_addr = hipzout.vaddr;
-       mr_attr->size = hipzout.len;
-       mr_attr->lkey = hipzout.lkey;
-       mr_attr->rkey = hipzout.rkey;
-       ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
-
-query_mr_exit1:
-       spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-query_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
-                        ret, mr, mr_attr);
-       return ret;
-} /* end ehca_query_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_mr(struct ib_mr *mr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-
-       if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-                        "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-               ret = -EINVAL;
-               goto dereg_mr_exit0;
-       } else if (e_mr == shca->maxmr) {
-               /* should be impossible, however reject to be sure */
-               ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
-                        "shca->maxmr=%p mr->lkey=%x",
-                        mr, shca->maxmr, mr->lkey);
-               ret = -EINVAL;
-               goto dereg_mr_exit0;
-       }
-
-       /* TODO: BUSY: MR still has bound window(s) */
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
-                        "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
-                        h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
-                        e_mr->ipz_mr_handle.handle, mr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto dereg_mr_exit0;
-       }
-
-       if (e_mr->umem)
-               ib_umem_release(e_mr->umem);
-
-       /* successful deregistration */
-       ehca_mr_delete(e_mr);
-
-dereg_mr_exit0:
-       if (ret)
-               ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
-       return ret;
-} /* end ehca_dereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-       struct ib_mw *ib_mw;
-       u64 h_ret;
-       struct ehca_mw *e_mw;
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_mw_hipzout_parms hipzout;
-
-       if (type != IB_MW_TYPE_1)
-               return ERR_PTR(-EINVAL);
-
-       e_mw = ehca_mw_new();
-       if (!e_mw) {
-               ib_mw = ERR_PTR(-ENOMEM);
-               goto alloc_mw_exit0;
-       }
-
-       h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
-                                        e_pd->fw_pd, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
-                        "shca=%p hca_hndl=%llx mw=%p",
-                        h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
-               ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
-               goto alloc_mw_exit1;
-       }
-       /* successful MW allocation */
-       e_mw->ipz_mw_handle = hipzout.handle;
-       e_mw->ib_mw.rkey    = hipzout.rkey;
-       return &e_mw->ib_mw;
-
-alloc_mw_exit1:
-       ehca_mw_delete(e_mw);
-alloc_mw_exit0:
-       if (IS_ERR(ib_mw))
-               ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
-       return ib_mw;
-} /* end ehca_alloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_bind_mw(struct ib_qp *qp,
-                struct ib_mw *mw,
-                struct ib_mw_bind *mw_bind)
-{
-       /* TODO: not supported up to now */
-       ehca_gen_err("bind MW currently not supported by HCAD");
-
-       return -EPERM;
-} /* end ehca_bind_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_mw(struct ib_mw *mw)
-{
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(mw->device, struct ehca_shca, ib_device);
-       struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
-
-       h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
-                        "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
-                        h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
-                        e_mw->ipz_mw_handle.handle);
-               return ehca2ib_return_code(h_ret);
-       }
-       /* successful deallocation */
-       ehca_mw_delete(e_mw);
-       return 0;
-} /* end ehca_dealloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-                             int mr_access_flags,
-                             struct ib_fmr_attr *fmr_attr)
-{
-       struct ib_fmr *ib_fmr;
-       struct ehca_shca *shca =
-               container_of(pd->device, struct ehca_shca, ib_device);
-       struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_mr *e_fmr;
-       int ret;
-       u32 tmp_lkey, tmp_rkey;
-       struct ehca_mr_pginfo pginfo;
-       u64 hw_pgsize;
-
-       /* check other parameters */
-       if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-           ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-            !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-               /*
-                * Remote Write Access requires Local Write Access
-                * Remote Atomic Access requires Local Write Access
-                */
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-       if (mr_access_flags & IB_ACCESS_MW_BIND) {
-               ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-                        mr_access_flags);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-       if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
-               ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
-                        "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
-                        fmr_attr->max_pages, fmr_attr->max_maps,
-                        fmr_attr->page_shift);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-
-       hw_pgsize = 1 << fmr_attr->page_shift;
-       if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
-               ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
-                        fmr_attr->page_shift);
-               ib_fmr = ERR_PTR(-EINVAL);
-               goto alloc_fmr_exit0;
-       }
-
-       e_fmr = ehca_mr_new();
-       if (!e_fmr) {
-               ib_fmr = ERR_PTR(-ENOMEM);
-               goto alloc_fmr_exit0;
-       }
-       e_fmr->flags |= EHCA_MR_FLAG_FMR;
-
-       /* register MR on HCA */
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.hwpage_size = hw_pgsize;
-       /*
-        * pginfo.num_hwpages==0, ie register_rpages() will not be called
-        * but deferred to map_phys_fmr()
-        */
-       ret = ehca_reg_mr(shca, e_fmr, NULL,
-                         fmr_attr->max_pages * (1 << fmr_attr->page_shift),
-                         mr_access_flags, e_pd, &pginfo,
-                         &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
-       if (ret) {
-               ib_fmr = ERR_PTR(ret);
-               goto alloc_fmr_exit1;
-       }
-
-       /* successful */
-       e_fmr->hwpage_size = hw_pgsize;
-       e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
-       e_fmr->fmr_max_pages = fmr_attr->max_pages;
-       e_fmr->fmr_max_maps = fmr_attr->max_maps;
-       e_fmr->fmr_map_cnt = 0;
-       return &e_fmr->ib.ib_fmr;
-
-alloc_fmr_exit1:
-       ehca_mr_delete(e_fmr);
-alloc_fmr_exit0:
-       return ib_fmr;
-} /* end ehca_alloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-                     u64 *page_list,
-                     int list_len,
-                     u64 iova)
-{
-       int ret;
-       struct ehca_shca *shca =
-               container_of(fmr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-       struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
-       struct ehca_mr_pginfo pginfo;
-       u32 tmp_lkey, tmp_rkey;
-
-       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-                        e_fmr, e_fmr->flags);
-               ret = -EINVAL;
-               goto map_phys_fmr_exit0;
-       }
-       ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
-       if (ret)
-               goto map_phys_fmr_exit0;
-       if (iova % e_fmr->fmr_page_size) {
-               /* only whole-numbered pages */
-               ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
-                        iova, e_fmr->fmr_page_size);
-               ret = -EINVAL;
-               goto map_phys_fmr_exit0;
-       }
-       if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
-               /* HCAD does not limit the maps, however trace this anyway */
-               ehca_info(fmr->device, "map limit exceeded, fmr=%p "
-                         "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
-                         fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
-       }
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_FMR;
-       pginfo.num_kpages = list_len;
-       pginfo.hwpage_size = e_fmr->hwpage_size;
-       pginfo.num_hwpages =
-               list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
-       pginfo.u.fmr.page_list = page_list;
-       pginfo.next_hwpage =
-               (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
-       pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
-
-       ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
-                           list_len * e_fmr->fmr_page_size,
-                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-       if (ret)
-               goto map_phys_fmr_exit0;
-
-       /* successful reregistration */
-       e_fmr->fmr_map_cnt++;
-       e_fmr->ib.ib_fmr.lkey = tmp_lkey;
-       e_fmr->ib.ib_fmr.rkey = tmp_rkey;
-       return 0;
-
-map_phys_fmr_exit0:
-       if (ret)
-               ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
-                        "iova=%llx", ret, fmr, page_list, list_len, iova);
-       return ret;
-} /* end ehca_map_phys_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_fmr(struct list_head *fmr_list)
-{
-       int ret = 0;
-       struct ib_fmr *ib_fmr;
-       struct ehca_shca *shca = NULL;
-       struct ehca_shca *prev_shca;
-       struct ehca_mr *e_fmr;
-       u32 num_fmr = 0;
-       u32 unmap_fmr_cnt = 0;
-
-       /* check all FMR belong to same SHCA, and check internal flag */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               prev_shca = shca;
-               shca = container_of(ib_fmr->device, struct ehca_shca,
-                                   ib_device);
-               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-               if ((shca != prev_shca) && prev_shca) {
-                       ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
-                                "prev_shca=%p e_fmr=%p",
-                                shca, prev_shca, e_fmr);
-                       ret = -EINVAL;
-                       goto unmap_fmr_exit0;
-               }
-               if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-                       ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
-                                "e_fmr->flags=%x", e_fmr, e_fmr->flags);
-                       ret = -EINVAL;
-                       goto unmap_fmr_exit0;
-               }
-               num_fmr++;
-       }
-
-       /* loop over all FMRs to unmap */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               unmap_fmr_cnt++;
-               e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-               shca = container_of(ib_fmr->device, struct ehca_shca,
-                                   ib_device);
-               ret = ehca_unmap_one_fmr(shca, e_fmr);
-               if (ret) {
-                       /* unmap failed, stop unmapping of rest of FMRs */
-                       ehca_err(&shca->ib_device, "unmap of one FMR failed, "
-                                "stop rest, e_fmr=%p num_fmr=%x "
-                                "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
-                                unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
-                       goto unmap_fmr_exit0;
-               }
-       }
-
-unmap_fmr_exit0:
-       if (ret)
-               ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
-                            ret, fmr_list, num_fmr, unmap_fmr_cnt);
-       return ret;
-} /* end ehca_unmap_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr)
-{
-       int ret;
-       u64 h_ret;
-       struct ehca_shca *shca =
-               container_of(fmr->device, struct ehca_shca, ib_device);
-       struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-
-       if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-               ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-                        e_fmr, e_fmr->flags);
-               ret = -EINVAL;
-               goto free_fmr_exit0;
-       }
-
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
-                        "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle, fmr->lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto free_fmr_exit0;
-       }
-       /* successful deregistration */
-       ehca_mr_delete(e_fmr);
-       return 0;
-
-free_fmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
-       return ret;
-} /* end ehca_dealloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-                                  struct ehca_mr *e_mr,
-                                  struct ehca_mr_pginfo *pginfo);
-
-int ehca_reg_mr(struct ehca_shca *shca,
-               struct ehca_mr *e_mr,
-               u64 *iova_start,
-               u64 size,
-               int acl,
-               struct ehca_pd *e_pd,
-               struct ehca_mr_pginfo *pginfo,
-               u32 *lkey, /*OUT*/
-               u32 *rkey, /*OUT*/
-               enum ehca_reg_type reg_type)
-{
-       int ret;
-       u64 h_ret;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-       if (ehca_use_hp_mr == 1)
-               hipz_acl |= 0x00000001;
-
-       h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
-                                        (u64)iova_start, size, hipz_acl,
-                                        e_pd->fw_pd, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
-                        "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_reg_mr_exit0;
-       }
-
-       e_mr->ipz_mr_handle = hipzout.handle;
-
-       if (reg_type == EHCA_REG_BUSMAP_MR)
-               ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
-       else if (reg_type == EHCA_REG_MR)
-               ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
-       else
-               ret = -EINVAL;
-
-       if (ret)
-               goto ehca_reg_mr_exit1;
-
-       /* successful registration */
-       e_mr->num_kpages = pginfo->num_kpages;
-       e_mr->num_hwpages = pginfo->num_hwpages;
-       e_mr->hwpage_size = pginfo->hwpage_size;
-       e_mr->start = iova_start;
-       e_mr->size = size;
-       e_mr->acl = acl;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-
-ehca_reg_mr_exit1:
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
-                        "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
-                        h_ret, shca, e_mr, iova_start, size, acl, e_pd,
-                        hipzout.lkey, pginfo, pginfo->num_kpages,
-                        pginfo->num_hwpages, ret);
-               ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
-                        "not recoverable");
-       }
-ehca_reg_mr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-                        "num_kpages=%llx num_hwpages=%llx",
-                        ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
-                        pginfo->num_kpages, pginfo->num_hwpages);
-       return ret;
-} /* end ehca_reg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-                      struct ehca_mr *e_mr,
-                      struct ehca_mr_pginfo *pginfo)
-{
-       int ret = 0;
-       u64 h_ret;
-       u32 rnum;
-       u64 rpage;
-       u32 i;
-       u64 *kpage;
-
-       if (!pginfo->num_hwpages) /* in case of fmr */
-               return 0;
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               ret = -ENOMEM;
-               goto ehca_reg_mr_rpages_exit0;
-       }
-
-       /* max MAX_RPAGES ehca mr pages per register call */
-       for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
-
-               if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-                       rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
-                       if (rnum == 0)
-                               rnum = MAX_RPAGES;      /* last shot is full */
-               } else
-                       rnum = MAX_RPAGES;
-
-               ret = ehca_set_pagebuf(pginfo, rnum, kpage);
-               if (ret) {
-                       ehca_err(&shca->ib_device, "ehca_set_pagebuf "
-                                "bad rc, ret=%i rnum=%x kpage=%p",
-                                ret, rnum, kpage);
-                       goto ehca_reg_mr_rpages_exit1;
-               }
-
-               if (rnum > 1) {
-                       rpage = __pa(kpage);
-                       if (!rpage) {
-                               ehca_err(&shca->ib_device, "kpage=%p i=%x",
-                                        kpage, i);
-                               ret = -EFAULT;
-                               goto ehca_reg_mr_rpages_exit1;
-                       }
-               } else
-                       rpage = *kpage;
-
-               h_ret = hipz_h_register_rpage_mr(
-                       shca->ipz_hca_handle, e_mr,
-                       ehca_encode_hwpage_size(pginfo->hwpage_size),
-                       0, rpage, rnum);
-
-               if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-                       /*
-                        * check for 'registration complete'==H_SUCCESS
-                        * and for 'page registered'==H_PAGE_REGISTERED
-                        */
-                       if (h_ret != H_SUCCESS) {
-                               ehca_err(&shca->ib_device, "last "
-                                        "hipz_reg_rpage_mr failed, h_ret=%lli "
-                                        "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
-                                        " lkey=%x", h_ret, e_mr, i,
-                                        shca->ipz_hca_handle.handle,
-                                        e_mr->ipz_mr_handle.handle,
-                                        e_mr->ib.ib_mr.lkey);
-                               ret = ehca2ib_return_code(h_ret);
-                               break;
-                       } else
-                               ret = 0;
-               } else if (h_ret != H_PAGE_REGISTERED) {
-                       ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
-                                "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
-                                "mr_hndl=%llx", h_ret, e_mr, i,
-                                e_mr->ib.ib_mr.lkey,
-                                shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle);
-                       ret = ehca2ib_return_code(h_ret);
-                       break;
-               } else
-                       ret = 0;
-       } /* end for(i) */
-
-
-ehca_reg_mr_rpages_exit1:
-       ehca_free_fw_ctrlblock(kpage);
-ehca_reg_mr_rpages_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
-                        "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
-                        pginfo, pginfo->num_kpages, pginfo->num_hwpages);
-       return ret;
-} /* end ehca_reg_mr_rpages() */
-
-/*----------------------------------------------------------------------*/
-
-inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
-                               struct ehca_mr *e_mr,
-                               u64 *iova_start,
-                               u64 size,
-                               u32 acl,
-                               struct ehca_pd *e_pd,
-                               struct ehca_mr_pginfo *pginfo,
-                               u32 *lkey, /*OUT*/
-                               u32 *rkey) /*OUT*/
-{
-       int ret;
-       u64 h_ret;
-       u32 hipz_acl;
-       u64 *kpage;
-       u64 rpage;
-       struct ehca_mr_pginfo pginfo_save;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               ret = -ENOMEM;
-               goto ehca_rereg_mr_rereg1_exit0;
-       }
-
-       pginfo_save = *pginfo;
-       ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
-       if (ret) {
-               ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
-                        "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
-                        "kpage=%p", e_mr, pginfo, pginfo->type,
-                        pginfo->num_kpages, pginfo->num_hwpages, kpage);
-               goto ehca_rereg_mr_rereg1_exit1;
-       }
-       rpage = __pa(kpage);
-       if (!rpage) {
-               ehca_err(&shca->ib_device, "kpage=%p", kpage);
-               ret = -EFAULT;
-               goto ehca_rereg_mr_rereg1_exit1;
-       }
-       h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
-                                     (u64)iova_start, size, hipz_acl,
-                                     e_pd->fw_pd, rpage, &hipzout);
-       if (h_ret != H_SUCCESS) {
-               /*
-                * reregistration unsuccessful, try it again with the 3 hCalls,
-                * e.g. this is required in case H_MR_CONDITION
-                * (MW bound or MR is shared)
-                */
-               ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
-                         "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
-               *pginfo = pginfo_save;
-               ret = -EAGAIN;
-       } else if ((u64 *)hipzout.vaddr != iova_start) {
-               ehca_err(&shca->ib_device, "PHYP changed iova_start in "
-                        "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
-                        "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
-                        hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
-                        e_mr->ib.ib_mr.lkey, hipzout.lkey);
-               ret = -EFAULT;
-       } else {
-               /*
-                * successful reregistration
-                * note: start and start_out are identical for eServer HCAs
-                */
-               e_mr->num_kpages = pginfo->num_kpages;
-               e_mr->num_hwpages = pginfo->num_hwpages;
-               e_mr->hwpage_size = pginfo->hwpage_size;
-               e_mr->start = iova_start;
-               e_mr->size = size;
-               e_mr->acl = acl;
-               *lkey = hipzout.lkey;
-               *rkey = hipzout.rkey;
-       }
-
-ehca_rereg_mr_rereg1_exit1:
-       ehca_free_fw_ctrlblock(kpage);
-ehca_rereg_mr_rereg1_exit0:
-       if ( ret && (ret != -EAGAIN) )
-               ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
-                        "pginfo=%p num_kpages=%llx num_hwpages=%llx",
-                        ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
-                        pginfo->num_hwpages);
-       return ret;
-} /* end ehca_rereg_mr_rereg1() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-                 struct ehca_mr *e_mr,
-                 u64 *iova_start,
-                 u64 size,
-                 int acl,
-                 struct ehca_pd *e_pd,
-                 struct ehca_mr_pginfo *pginfo,
-                 u32 *lkey,
-                 u32 *rkey)
-{
-       int ret = 0;
-       u64 h_ret;
-       int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
-       int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
-
-       /* first determine reregistration hCall(s) */
-       if ((pginfo->num_hwpages > MAX_RPAGES) ||
-           (e_mr->num_hwpages > MAX_RPAGES) ||
-           (pginfo->num_hwpages > e_mr->num_hwpages)) {
-               ehca_dbg(&shca->ib_device, "Rereg3 case, "
-                        "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
-                        pginfo->num_hwpages, e_mr->num_hwpages);
-               rereg_1_hcall = 0;
-               rereg_3_hcall = 1;
-       }
-
-       if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
-               rereg_1_hcall = 0;
-               rereg_3_hcall = 1;
-               e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
-               ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
-                        e_mr);
-       }
-
-       if (rereg_1_hcall) {
-               ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
-                                          acl, e_pd, pginfo, lkey, rkey);
-               if (ret) {
-                       if (ret == -EAGAIN)
-                               rereg_3_hcall = 1;
-                       else
-                               goto ehca_rereg_mr_exit0;
-               }
-       }
-
-       if (rereg_3_hcall) {
-               struct ehca_mr save_mr;
-
-               /* first deregister old MR */
-               h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-               if (h_ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                                "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
-                                "mr->lkey=%x",
-                                h_ret, e_mr, shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle,
-                                e_mr->ib.ib_mr.lkey);
-                       ret = ehca2ib_return_code(h_ret);
-                       goto ehca_rereg_mr_exit0;
-               }
-               /* clean ehca_mr_t, without changing struct ib_mr and lock */
-               save_mr = *e_mr;
-               ehca_mr_deletenew(e_mr);
-
-               /* set some MR values */
-               e_mr->flags = save_mr.flags;
-               e_mr->hwpage_size = save_mr.hwpage_size;
-               e_mr->fmr_page_size = save_mr.fmr_page_size;
-               e_mr->fmr_max_pages = save_mr.fmr_max_pages;
-               e_mr->fmr_max_maps = save_mr.fmr_max_maps;
-               e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
-
-               ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
-                                 e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
-               if (ret) {
-                       u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
-                       memcpy(&e_mr->flags, &(save_mr.flags),
-                              sizeof(struct ehca_mr) - offset);
-                       goto ehca_rereg_mr_exit0;
-               }
-       }
-
-ehca_rereg_mr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-                        "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-                        "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
-                        "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
-                        acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
-                        rereg_1_hcall, rereg_3_hcall);
-       return ret;
-} /* end ehca_rereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-                      struct ehca_mr *e_fmr)
-{
-       int ret = 0;
-       u64 h_ret;
-       struct ehca_pd *e_pd =
-               container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
-       struct ehca_mr save_fmr;
-       u32 tmp_lkey, tmp_rkey;
-       struct ehca_mr_pginfo pginfo;
-       struct ehca_mr_hipzout_parms hipzout;
-       struct ehca_mr save_mr;
-
-       if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
-               /*
-                * note: after using rereg hcall with len=0,
-                * rereg hcall must be used again for registering pages
-                */
-               h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
-                                             0, 0, e_pd->fw_pd, 0, &hipzout);
-               if (h_ret == H_SUCCESS) {
-                       /* successful reregistration */
-                       e_fmr->start = NULL;
-                       e_fmr->size = 0;
-                       tmp_lkey = hipzout.lkey;
-                       tmp_rkey = hipzout.rkey;
-                       return 0;
-               }
-               /*
-                * should not happen, because length checked above,
-                * FMRs are not shared and no MW bound to FMRs
-                */
-               ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
-                        "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
-                        "mr_hndl=%llx lkey=%x lkey_out=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle,
-                        e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
-               /* try free and rereg */
-       }
-
-       /* first free old FMR */
-       h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                        "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
-                        "lkey=%x",
-                        h_ret, e_fmr, shca->ipz_hca_handle.handle,
-                        e_fmr->ipz_mr_handle.handle,
-                        e_fmr->ib.ib_fmr.lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_unmap_one_fmr_exit0;
-       }
-       /* clean ehca_mr_t, without changing lock */
-       save_fmr = *e_fmr;
-       ehca_mr_deletenew(e_fmr);
-
-       /* set some MR values */
-       e_fmr->flags = save_fmr.flags;
-       e_fmr->hwpage_size = save_fmr.hwpage_size;
-       e_fmr->fmr_page_size = save_fmr.fmr_page_size;
-       e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
-       e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
-       e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
-       e_fmr->acl = save_fmr.acl;
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_FMR;
-       ret = ehca_reg_mr(shca, e_fmr, NULL,
-                         (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
-                         e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
-                         &tmp_rkey, EHCA_REG_MR);
-       if (ret) {
-               u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
-               memcpy(&e_fmr->flags, &(save_mr.flags),
-                      sizeof(struct ehca_mr) - offset);
-       }
-
-ehca_unmap_one_fmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
-                        "fmr_max_pages=%x",
-                        ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
-       return ret;
-} /* end ehca_unmap_one_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_smr(struct ehca_shca *shca,
-                struct ehca_mr *e_origmr,
-                struct ehca_mr *e_newmr,
-                u64 *iova_start,
-                int acl,
-                struct ehca_pd *e_pd,
-                u32 *lkey, /*OUT*/
-                u32 *rkey) /*OUT*/
-{
-       int ret = 0;
-       u64 h_ret;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
-                                   &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-                        "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
-                        "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
-                        shca->ipz_hca_handle.handle,
-                        e_origmr->ipz_mr_handle.handle,
-                        e_origmr->ib.ib_mr.lkey);
-               ret = ehca2ib_return_code(h_ret);
-               goto ehca_reg_smr_exit0;
-       }
-       /* successful registration */
-       e_newmr->num_kpages = e_origmr->num_kpages;
-       e_newmr->num_hwpages = e_origmr->num_hwpages;
-       e_newmr->hwpage_size   = e_origmr->hwpage_size;
-       e_newmr->start = iova_start;
-       e_newmr->size = e_origmr->size;
-       e_newmr->acl = acl;
-       e_newmr->ipz_mr_handle = hipzout.handle;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-
-ehca_reg_smr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
-                        "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
-                        ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
-       return ret;
-} /* end ehca_reg_smr() */
-
-/*----------------------------------------------------------------------*/
-static inline void *ehca_calc_sectbase(int top, int dir, int idx)
-{
-       unsigned long ret = idx;
-       ret |= dir << EHCA_DIR_INDEX_SHIFT;
-       ret |= top << EHCA_TOP_INDEX_SHIFT;
-       return __va(ret << SECTION_SIZE_BITS);
-}
-
-#define ehca_bmap_valid(entry) \
-       ((u64)entry != (u64)EHCA_INVAL_ADDR)
-
-static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
-                              struct ehca_shca *shca, struct ehca_mr *mr,
-                              struct ehca_mr_pginfo *pginfo)
-{
-       u64 h_ret = 0;
-       unsigned long page = 0;
-       u64 rpage = __pa(kpage);
-       int page_count;
-
-       void *sectbase = ehca_calc_sectbase(top, dir, idx);
-       if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
-               ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
-                                          "hwpage_size does not fit to "
-                                          "section start address");
-       }
-       page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
-
-       while (page < page_count) {
-               u64 rnum;
-               for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
-                    rnum++) {
-                       void *pg = sectbase + ((page++) * pginfo->hwpage_size);
-                       kpage[rnum] = __pa(pg);
-               }
-
-               h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
-                       ehca_encode_hwpage_size(pginfo->hwpage_size),
-                       0, rpage, rnum);
-
-               if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
-                       ehca_err(&shca->ib_device, "register_rpage_mr failed");
-                       return h_ret;
-               }
-       }
-       return h_ret;
-}
-
-static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
-                               struct ehca_shca *shca, struct ehca_mr *mr,
-                               struct ehca_mr_pginfo *pginfo)
-{
-       u64 hret = H_SUCCESS;
-       int idx;
-
-       for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
-                       continue;
-
-               hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
-                                          pginfo);
-               if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-                               return hret;
-       }
-       return hret;
-}
-
-static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
-                                   struct ehca_mr *mr,
-                                   struct ehca_mr_pginfo *pginfo)
-{
-       u64 hret = H_SUCCESS;
-       int dir;
-
-       for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-                       continue;
-
-               hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
-               if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-                               return hret;
-       }
-       return hret;
-}
-
-/* register internal max-MR to internal SHCA */
-int ehca_reg_internal_maxmr(
-       struct ehca_shca *shca,
-       struct ehca_pd *e_pd,
-       struct ehca_mr **e_maxmr)  /*OUT*/
-{
-       int ret;
-       struct ehca_mr *e_mr;
-       u64 *iova_start;
-       u64 size_maxmr;
-       struct ehca_mr_pginfo pginfo;
-       struct ib_phys_buf ib_pbuf;
-       u32 num_kpages;
-       u32 num_hwpages;
-       u64 hw_pgsize;
-
-       if (!ehca_bmap) {
-               ret = -EFAULT;
-               goto ehca_reg_internal_maxmr_exit0;
-       }
-
-       e_mr = ehca_mr_new();
-       if (!e_mr) {
-               ehca_err(&shca->ib_device, "out of memory");
-               ret = -ENOMEM;
-               goto ehca_reg_internal_maxmr_exit0;
-       }
-       e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-
-       /* register internal max-MR on HCA */
-       size_maxmr = ehca_mr_len;
-       iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
-       ib_pbuf.addr = 0;
-       ib_pbuf.size = size_maxmr;
-       num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
-                               PAGE_SIZE);
-       hw_pgsize = ehca_get_max_hwpage_size(shca);
-       num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
-                                hw_pgsize);
-
-       memset(&pginfo, 0, sizeof(pginfo));
-       pginfo.type = EHCA_MR_PGI_PHYS;
-       pginfo.num_kpages = num_kpages;
-       pginfo.num_hwpages = num_hwpages;
-       pginfo.hwpage_size = hw_pgsize;
-       pginfo.u.phy.num_phys_buf = 1;
-       pginfo.u.phy.phys_buf_array = &ib_pbuf;
-
-       ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
-                         &pginfo, &e_mr->ib.ib_mr.lkey,
-                         &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
-       if (ret) {
-               ehca_err(&shca->ib_device, "reg of internal max MR failed, "
-                        "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
-                        "num_hwpages=%x", e_mr, iova_start, size_maxmr,
-                        num_kpages, num_hwpages);
-               goto ehca_reg_internal_maxmr_exit1;
-       }
-
-       /* successful registration of all pages */
-       e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
-       e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
-       e_mr->ib.ib_mr.uobject = NULL;
-       atomic_inc(&(e_pd->ib_pd.usecnt));
-       atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
-       *e_maxmr = e_mr;
-       return 0;
-
-ehca_reg_internal_maxmr_exit1:
-       ehca_mr_delete(e_mr);
-ehca_reg_internal_maxmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
-                        ret, shca, e_pd, e_maxmr);
-       return ret;
-} /* end ehca_reg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-                  struct ehca_mr *e_newmr,
-                  u64 *iova_start,
-                  int acl,
-                  struct ehca_pd *e_pd,
-                  u32 *lkey,
-                  u32 *rkey)
-{
-       u64 h_ret;
-       struct ehca_mr *e_origmr = shca->maxmr;
-       u32 hipz_acl;
-       struct ehca_mr_hipzout_parms hipzout;
-
-       ehca_mrmw_map_acl(acl, &hipz_acl);
-       ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-       h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-                                   (u64)iova_start, hipz_acl, e_pd->fw_pd,
-                                   &hipzout);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-                        "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-                        h_ret, e_origmr, shca->ipz_hca_handle.handle,
-                        e_origmr->ipz_mr_handle.handle,
-                        e_origmr->ib.ib_mr.lkey);
-               return ehca2ib_return_code(h_ret);
-       }
-       /* successful registration */
-       e_newmr->num_kpages = e_origmr->num_kpages;
-       e_newmr->num_hwpages = e_origmr->num_hwpages;
-       e_newmr->hwpage_size = e_origmr->hwpage_size;
-       e_newmr->start = iova_start;
-       e_newmr->size = e_origmr->size;
-       e_newmr->acl = acl;
-       e_newmr->ipz_mr_handle = hipzout.handle;
-       *lkey = hipzout.lkey;
-       *rkey = hipzout.rkey;
-       return 0;
-} /* end ehca_reg_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
-{
-       int ret;
-       struct ehca_mr *e_maxmr;
-       struct ib_pd *ib_pd;
-
-       if (!shca->maxmr) {
-               ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
-               ret = -EINVAL;
-               goto ehca_dereg_internal_maxmr_exit0;
-       }
-
-       e_maxmr = shca->maxmr;
-       ib_pd = e_maxmr->ib.ib_mr.pd;
-       shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
-
-       ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
-       if (ret) {
-               ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
-                        "ret=%i e_maxmr=%p shca=%p lkey=%x",
-                        ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
-               shca->maxmr = e_maxmr;
-               goto ehca_dereg_internal_maxmr_exit0;
-       }
-
-       atomic_dec(&ib_pd->usecnt);
-
-ehca_dereg_internal_maxmr_exit0:
-       if (ret)
-               ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
-                        ret, shca, shca->maxmr);
-       return ret;
-} /* end ehca_dereg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check physical buffer array of MR verbs for validness and
- * calculates MR size
- */
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
-                                 int num_phys_buf,
-                                 u64 *iova_start,
-                                 u64 *size)
-{
-       struct ib_phys_buf *pbuf = phys_buf_array;
-       u64 size_count = 0;
-       u32 i;
-
-       if (num_phys_buf == 0) {
-               ehca_gen_err("bad phys buf array len, num_phys_buf=0");
-               return -EINVAL;
-       }
-       /* check first buffer */
-       if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
-               ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
-                            "pbuf->addr=%llx pbuf->size=%llx",
-                            iova_start, pbuf->addr, pbuf->size);
-               return -EINVAL;
-       }
-       if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
-           (num_phys_buf > 1)) {
-               ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
-                            "pbuf->size=%llx", pbuf->addr, pbuf->size);
-               return -EINVAL;
-       }
-
-       for (i = 0; i < num_phys_buf; i++) {
-               if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
-                       ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
-                                    "pbuf->size=%llx",
-                                    i, pbuf->addr, pbuf->size);
-                       return -EINVAL;
-               }
-               if (((i > 0) && /* not 1st */
-                    (i < (num_phys_buf - 1)) &&        /* not last */
-                    (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
-                       ehca_gen_err("bad size, i=%x pbuf->size=%llx",
-                                    i, pbuf->size);
-                       return -EINVAL;
-               }
-               size_count += pbuf->size;
-               pbuf++;
-       }
-
-       *size = size_count;
-       return 0;
-} /* end ehca_mr_chk_buf_and_calc_size() */
-
-/*----------------------------------------------------------------------*/
-
-/* check page list of map FMR verb for validness */
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-                            u64 *page_list,
-                            int list_len)
-{
-       u32 i;
-       u64 *page;
-
-       if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
-               ehca_gen_err("bad list_len, list_len=%x "
-                            "e_fmr->fmr_max_pages=%x fmr=%p",
-                            list_len, e_fmr->fmr_max_pages, e_fmr);
-               return -EINVAL;
-       }
-
-       /* each page must be aligned */
-       page = page_list;
-       for (i = 0; i < list_len; i++) {
-               if (*page % e_fmr->fmr_page_size) {
-                       ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
-                                    "fmr_page_size=%x", i, *page, page, e_fmr,
-                                    e_fmr->fmr_page_size);
-                       return -EINVAL;
-               }
-               page++;
-       }
-
-       return 0;
-} /* end ehca_fmr_check_page_list() */
-
-/*----------------------------------------------------------------------*/
-
-/* PAGE_SIZE >= pginfo->hwpage_size */
-static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
-                                 u32 number,
-                                 u64 *kpage)
-{
-       int ret = 0;
-       u64 pgaddr;
-       u32 j = 0;
-       int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-       struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-       while (*sg != NULL) {
-               pgaddr = page_to_pfn(sg_page(*sg))
-                       << PAGE_SHIFT;
-               *kpage = pgaddr + (pginfo->next_hwpage *
-                                  pginfo->hwpage_size);
-               if (!(*kpage)) {
-                       ehca_gen_err("pgaddr=%llx "
-                                    "sg_dma_address=%llx "
-                                    "entry=%llx next_hwpage=%llx",
-                                    pgaddr, (u64)sg_dma_address(*sg),
-                                    pginfo->u.usr.next_nmap,
-                                    pginfo->next_hwpage);
-                       return -EFAULT;
-               }
-               (pginfo->hwpage_cnt)++;
-               (pginfo->next_hwpage)++;
-               kpage++;
-               if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
-                       (pginfo->kpage_cnt)++;
-                       (pginfo->u.usr.next_nmap)++;
-                       pginfo->next_hwpage = 0;
-                       *sg = sg_next(*sg);
-               }
-               j++;
-               if (j >= number)
-                       break;
-       }
-
-       return ret;
-}
-
-/*
- * check given pages for contiguous layout
- * last page addr is returned in prev_pgaddr for further check
- */
-static int ehca_check_kpages_per_ate(struct scatterlist **sg,
-                                    int num_pages,
-                                    u64 *prev_pgaddr)
-{
-       for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
-               u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
-               if (ehca_debug_level >= 3)
-                       ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
-                                    *(u64 *)__va(pgaddr));
-               if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
-                       ehca_gen_err("uncontiguous page found pgaddr=%llx "
-                                    "prev_pgaddr=%llx entries_left_in_hwpage=%x",
-                                    pgaddr, *prev_pgaddr, num_pages);
-                       return -EINVAL;
-               }
-               *prev_pgaddr = pgaddr;
-       }
-       return 0;
-}
-
-/* PAGE_SIZE < pginfo->hwpage_size */
-static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
-                                 u32 number,
-                                 u64 *kpage)
-{
-       int ret = 0;
-       u64 pgaddr, prev_pgaddr;
-       u32 j = 0;
-       int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
-       int nr_kpages = kpages_per_hwpage;
-       struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-       while (*sg != NULL) {
-
-               if (nr_kpages == kpages_per_hwpage) {
-                       pgaddr = (page_to_pfn(sg_page(*sg))
-                                  << PAGE_SHIFT);
-                       *kpage = pgaddr;
-                       if (!(*kpage)) {
-                               ehca_gen_err("pgaddr=%llx entry=%llx",
-                                            pgaddr, pginfo->u.usr.next_nmap);
-                               ret = -EFAULT;
-                               return ret;
-                       }
-                       /*
-                        * The first page in a hwpage must be aligned;
-                        * the first MR page is exempt from this rule.
-                        */
-                       if (pgaddr & (pginfo->hwpage_size - 1)) {
-                               if (pginfo->hwpage_cnt) {
-                                       ehca_gen_err(
-                                               "invalid alignment "
-                                               "pgaddr=%llx entry=%llx "
-                                               "mr_pgsize=%llx",
-                                               pgaddr, pginfo->u.usr.next_nmap,
-                                               pginfo->hwpage_size);
-                                       ret = -EFAULT;
-                                       return ret;
-                               }
-                               /* first MR page */
-                               pginfo->kpage_cnt =
-                                       (pgaddr &
-                                        (pginfo->hwpage_size - 1)) >>
-                                       PAGE_SHIFT;
-                               nr_kpages -= pginfo->kpage_cnt;
-                               *kpage = pgaddr &
-                                        ~(pginfo->hwpage_size - 1);
-                       }
-                       if (ehca_debug_level >= 3) {
-                               u64 val = *(u64 *)__va(pgaddr);
-                               ehca_gen_dbg("kpage=%llx page=%llx "
-                                            "value=%016llx",
-                                            *kpage, pgaddr, val);
-                       }
-                       prev_pgaddr = pgaddr;
-                       *sg = sg_next(*sg);
-                       pginfo->kpage_cnt++;
-                       pginfo->u.usr.next_nmap++;
-                       nr_kpages--;
-                       if (!nr_kpages)
-                               goto next_kpage;
-                       continue;
-               }
-
-               ret = ehca_check_kpages_per_ate(sg, nr_kpages,
-                                               &prev_pgaddr);
-               if (ret)
-                       return ret;
-               pginfo->kpage_cnt += nr_kpages;
-               pginfo->u.usr.next_nmap += nr_kpages;
-
-next_kpage:
-               nr_kpages = kpages_per_hwpage;
-               (pginfo->hwpage_cnt)++;
-               kpage++;
-               j++;
-               if (j >= number)
-                       break;
-       }
-
-       return ret;
-}
-
-static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
-                                u32 number, u64 *kpage)
-{
-       int ret = 0;
-       struct ib_phys_buf *pbuf;
-       u64 num_hw, offs_hw;
-       u32 i = 0;
-
-       /* loop over desired phys_buf_array entries */
-       while (i < number) {
-               pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
-               num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
-                                    pbuf->size, pginfo->hwpage_size);
-               offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
-                       pginfo->hwpage_size;
-               while (pginfo->next_hwpage < offs_hw + num_hw) {
-                       /* sanity check */
-                       if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
-                           (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
-                               ehca_gen_err("kpage_cnt >= num_kpages, "
-                                            "kpage_cnt=%llx num_kpages=%llx "
-                                            "hwpage_cnt=%llx "
-                                            "num_hwpages=%llx i=%x",
-                                            pginfo->kpage_cnt,
-                                            pginfo->num_kpages,
-                                            pginfo->hwpage_cnt,
-                                            pginfo->num_hwpages, i);
-                               return -EFAULT;
-                       }
-                       *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
-                                (pginfo->next_hwpage * pginfo->hwpage_size);
-                       if ( !(*kpage) && pbuf->addr ) {
-                               ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
-                                            "next_hwpage=%llx", pbuf->addr,
-                                            pbuf->size, pginfo->next_hwpage);
-                               return -EFAULT;
-                       }
-                       (pginfo->hwpage_cnt)++;
-                       (pginfo->next_hwpage)++;
-                       if (PAGE_SIZE >= pginfo->hwpage_size) {
-                               if (pginfo->next_hwpage %
-                                   (PAGE_SIZE / pginfo->hwpage_size) == 0)
-                                       (pginfo->kpage_cnt)++;
-                       } else
-                               pginfo->kpage_cnt += pginfo->hwpage_size /
-                                       PAGE_SIZE;
-                       kpage++;
-                       i++;
-                       if (i >= number) break;
-               }
-               if (pginfo->next_hwpage >= offs_hw + num_hw) {
-                       (pginfo->u.phy.next_buf)++;
-                       pginfo->next_hwpage = 0;
-               }
-       }
-       return ret;
-}
-
-static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
-                               u32 number, u64 *kpage)
-{
-       int ret = 0;
-       u64 *fmrlist;
-       u32 i;
-
-       /* loop over desired page_list entries */
-       fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
-       for (i = 0; i < number; i++) {
-               *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
-                          pginfo->next_hwpage * pginfo->hwpage_size;
-               if ( !(*kpage) ) {
-                       ehca_gen_err("*fmrlist=%llx fmrlist=%p "
-                                    "next_listelem=%llx next_hwpage=%llx",
-                                    *fmrlist, fmrlist,
-                                    pginfo->u.fmr.next_listelem,
-                                    pginfo->next_hwpage);
-                       return -EFAULT;
-               }
-               (pginfo->hwpage_cnt)++;
-               if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
-                       if (pginfo->next_hwpage %
-                           (pginfo->u.fmr.fmr_pgsize /
-                            pginfo->hwpage_size) == 0) {
-                               (pginfo->kpage_cnt)++;
-                               (pginfo->u.fmr.next_listelem)++;
-                               fmrlist++;
-                               pginfo->next_hwpage = 0;
-                       } else
-                               (pginfo->next_hwpage)++;
-               } else {
-                       unsigned int cnt_per_hwpage = pginfo->hwpage_size /
-                               pginfo->u.fmr.fmr_pgsize;
-                       unsigned int j;
-                       u64 prev = *kpage;
-                       /* check if adrs are contiguous */
-                       for (j = 1; j < cnt_per_hwpage; j++) {
-                               u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
-                               if (prev + pginfo->u.fmr.fmr_pgsize != p) {
-                                       ehca_gen_err("uncontiguous fmr pages "
-                                                    "found prev=%llx p=%llx "
-                                                    "idx=%x", prev, p, i + j);
-                                       return -EINVAL;
-                               }
-                               prev = p;
-                       }
-                       pginfo->kpage_cnt += cnt_per_hwpage;
-                       pginfo->u.fmr.next_listelem += cnt_per_hwpage;
-                       fmrlist += cnt_per_hwpage;
-               }
-               kpage++;
-       }
-       return ret;
-}
-
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-                    u32 number,
-                    u64 *kpage)
-{
-       int ret;
-
-       switch (pginfo->type) {
-       case EHCA_MR_PGI_PHYS:
-               ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
-               break;
-       case EHCA_MR_PGI_USER:
-               ret = PAGE_SIZE >= pginfo->hwpage_size ?
-                       ehca_set_pagebuf_user1(pginfo, number, kpage) :
-                       ehca_set_pagebuf_user2(pginfo, number, kpage);
-               break;
-       case EHCA_MR_PGI_FMR:
-               ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
-               break;
-       default:
-               ehca_gen_err("bad pginfo->type=%x", pginfo->type);
-               ret = -EFAULT;
-               break;
-       }
-       return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check MR if it is a max-MR, i.e. uses whole memory
- * in case it's a max-MR 1 is returned, else 0
- */
-int ehca_mr_is_maxmr(u64 size,
-                    u64 *iova_start)
-{
-       /* a MR is treated as max-MR only if it fits following: */
-       if ((size == ehca_mr_len) &&
-           (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
-               ehca_gen_dbg("this is a max-MR");
-               return 1;
-       } else
-               return 0;
-} /* end ehca_mr_is_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* map access control for MR/MW. This routine is used for MR and MW. */
-void ehca_mrmw_map_acl(int ib_acl,
-                      u32 *hipz_acl)
-{
-       *hipz_acl = 0;
-       if (ib_acl & IB_ACCESS_REMOTE_READ)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
-       if (ib_acl & IB_ACCESS_REMOTE_WRITE)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
-       if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
-               *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
-       if (ib_acl & IB_ACCESS_LOCAL_WRITE)
-               *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
-       if (ib_acl & IB_ACCESS_MW_BIND)
-               *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
-} /* end ehca_mrmw_map_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
-{
-       *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
-} /* end ehca_mrmw_set_pgsize_hipz_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * reverse map access control for MR/MW.
- * This routine is used for MR and MW.
- */
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-                              int *ib_acl) /*OUT*/
-{
-       *ib_acl = 0;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
-               *ib_acl |= IB_ACCESS_REMOTE_READ;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
-               *ib_acl |= IB_ACCESS_REMOTE_WRITE;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
-               *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
-               *ib_acl |= IB_ACCESS_LOCAL_WRITE;
-       if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
-               *ib_acl |= IB_ACCESS_MW_BIND;
-} /* end ehca_mrmw_reverse_map_acl() */
-
-
-/*----------------------------------------------------------------------*/
-
-/*
- * MR destructor and constructor
- * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
- * except struct ib_mr and spinlock
- */
-void ehca_mr_deletenew(struct ehca_mr *mr)
-{
-       mr->flags = 0;
-       mr->num_kpages = 0;
-       mr->num_hwpages = 0;
-       mr->acl = 0;
-       mr->start = NULL;
-       mr->fmr_page_size = 0;
-       mr->fmr_max_pages = 0;
-       mr->fmr_max_maps = 0;
-       mr->fmr_map_cnt = 0;
-       memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
-       memset(&mr->galpas, 0, sizeof(mr->galpas));
-} /* end ehca_mr_deletenew() */
-
-int ehca_init_mrmw_cache(void)
-{
-       mr_cache = kmem_cache_create("ehca_cache_mr",
-                                    sizeof(struct ehca_mr), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!mr_cache)
-               return -ENOMEM;
-       mw_cache = kmem_cache_create("ehca_cache_mw",
-                                    sizeof(struct ehca_mw), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!mw_cache) {
-               kmem_cache_destroy(mr_cache);
-               mr_cache = NULL;
-               return -ENOMEM;
-       }
-       return 0;
-}
-
-void ehca_cleanup_mrmw_cache(void)
-{
-       kmem_cache_destroy(mr_cache);
-       kmem_cache_destroy(mw_cache);
-}
-
-static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
-                                    int dir)
-{
-       if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
-               ehca_top_bmap->dir[dir] =
-                       kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
-               if (!ehca_top_bmap->dir[dir])
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
-       }
-       return 0;
-}
-
-static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
-{
-       if (!ehca_bmap_valid(ehca_bmap->top[top])) {
-               ehca_bmap->top[top] =
-                       kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
-               if (!ehca_bmap->top[top])
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
-       }
-       return ehca_init_top_bmap(ehca_bmap->top[top], dir);
-}
-
-static inline int ehca_calc_index(unsigned long i, unsigned long s)
-{
-       return (i >> s) & EHCA_INDEX_MASK;
-}
-
-void ehca_destroy_busmap(void)
-{
-       int top, dir;
-
-       if (!ehca_bmap)
-               return;
-
-       for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]))
-                       continue;
-               for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-                       if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-                               continue;
-
-                       kfree(ehca_bmap->top[top]->dir[dir]);
-               }
-
-               kfree(ehca_bmap->top[top]);
-       }
-
-       kfree(ehca_bmap);
-       ehca_bmap = NULL;
-}
-
-static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
-{
-       unsigned long i, start_section, end_section;
-       int top, dir, idx;
-
-       if (!nr_pages)
-               return 0;
-
-       if (!ehca_bmap) {
-               ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
-               if (!ehca_bmap)
-                       return -ENOMEM;
-               /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-               memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
-       }
-
-       start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
-       end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
-       for (i = start_section; i < end_section; i++) {
-               int ret;
-               top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
-               dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
-               idx = i & EHCA_INDEX_MASK;
-
-               ret = ehca_init_bmap(ehca_bmap, top, dir);
-               if (ret) {
-                       ehca_destroy_busmap();
-                       return ret;
-               }
-               ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
-               ehca_mr_len += EHCA_SECTSIZE;
-       }
-       return 0;
-}
-
-static int ehca_is_hugepage(unsigned long pfn)
-{
-       int page_order;
-
-       if (pfn & EHCA_HUGEPAGE_PFN_MASK)
-               return 0;
-
-       page_order = compound_order(pfn_to_page(pfn));
-       if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
-               return 0;
-
-       return 1;
-}
-
-static int ehca_create_busmap_callback(unsigned long initial_pfn,
-                                      unsigned long total_nr_pages, void *arg)
-{
-       int ret;
-       unsigned long pfn, start_pfn, end_pfn, nr_pages;
-
-       if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
-               return ehca_update_busmap(initial_pfn, total_nr_pages);
-
-       /* Given chunk is >= 16GB -> check for hugepages */
-       start_pfn = initial_pfn;
-       end_pfn = initial_pfn + total_nr_pages;
-       pfn = start_pfn;
-
-       while (pfn < end_pfn) {
-               if (ehca_is_hugepage(pfn)) {
-                       /* Add mem found in front of the hugepage */
-                       nr_pages = pfn - start_pfn;
-                       ret = ehca_update_busmap(start_pfn, nr_pages);
-                       if (ret)
-                               return ret;
-                       /* Skip the hugepage */
-                       pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
-                       start_pfn = pfn;
-               } else
-                       pfn += (EHCA_SECTSIZE / PAGE_SIZE);
-       }
-
-       /* Add mem found behind the hugepage(s)  */
-       nr_pages = pfn - start_pfn;
-       return ehca_update_busmap(start_pfn, nr_pages);
-}
-
-int ehca_create_busmap(void)
-{
-       int ret;
-
-       ehca_mr_len = 0;
-       ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
-                                  ehca_create_busmap_callback);
-       return ret;
-}
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-                                  struct ehca_mr *e_mr,
-                                  struct ehca_mr_pginfo *pginfo)
-{
-       int top;
-       u64 hret, *kpage;
-
-       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!kpage) {
-               ehca_err(&shca->ib_device, "kpage alloc failed");
-               return -ENOMEM;
-       }
-       for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-               if (!ehca_bmap_valid(ehca_bmap->top[top]))
-                       continue;
-               hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
-               if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
-                       break;
-       }
-
-       ehca_free_fw_ctrlblock(kpage);
-
-       if (hret == H_SUCCESS)
-               return 0; /* Everything is fine */
-       else {
-               ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
-                                "h_ret=%lli e_mr=%p top=%x lkey=%x "
-                                "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
-                                e_mr->ib.ib_mr.lkey,
-                                shca->ipz_hca_handle.handle,
-                                e_mr->ipz_mr_handle.handle);
-               return ehca2ib_return_code(hret);
-       }
-}
-
-static u64 ehca_map_vaddr(void *caddr)
-{
-       int top, dir, idx;
-       unsigned long abs_addr, offset;
-       u64 entry;
-
-       if (!ehca_bmap)
-               return EHCA_INVAL_ADDR;
-
-       abs_addr = __pa(caddr);
-       top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
-       if (!ehca_bmap_valid(ehca_bmap->top[top]))
-               return EHCA_INVAL_ADDR;
-
-       dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
-       if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-               return EHCA_INVAL_ADDR;
-
-       idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
-
-       entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
-       if (ehca_bmap_valid(entry)) {
-               offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
-               return entry | offset;
-       } else
-               return EHCA_INVAL_ADDR;
-}
-
-static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == EHCA_INVAL_ADDR;
-}
-
-static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
-                              size_t size, enum dma_data_direction direction)
-{
-       if (cpu_addr)
-               return ehca_map_vaddr(cpu_addr);
-       else
-               return EHCA_INVAL_ADDR;
-}
-
-static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-                                 enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
-                            unsigned long offset, size_t size,
-                            enum dma_data_direction direction)
-{
-       u64 addr;
-
-       if (offset + size > PAGE_SIZE)
-               return EHCA_INVAL_ADDR;
-
-       addr = ehca_map_vaddr(page_address(page));
-       if (!ehca_dma_mapping_error(dev, addr))
-               addr += offset;
-
-       return addr;
-}
-
-static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                          int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       int i;
-
-       for_each_sg(sgl, sg, nents, i) {
-               u64 addr;
-               addr = ehca_map_vaddr(sg_virt(sg));
-               if (ehca_dma_mapping_error(dev, addr))
-                       return 0;
-
-               sg->dma_address = addr;
-               sg->dma_length = sg->length;
-       }
-       return nents;
-}
-
-static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
-                             int nents, enum dma_data_direction direction)
-{
-       /* This is only a stub; nothing to be done here */
-}
-
-static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-                                        size_t size,
-                                        enum dma_data_direction dir)
-{
-       dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
-}
-
-static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
-                                           size_t size,
-                                           enum dma_data_direction dir)
-{
-       dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                    u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-       u64 dma_addr;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p) {
-               addr = page_address(p);
-               dma_addr = ehca_map_vaddr(addr);
-               if (ehca_dma_mapping_error(dev, dma_addr)) {
-                       free_pages((unsigned long)addr, get_order(size));
-                       return NULL;
-               }
-               if (dma_handle)
-                       *dma_handle = dma_addr;
-               return addr;
-       }
-       return NULL;
-}
-
-static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
-                                  void *cpu_addr, u64 dma_handle)
-{
-       if (cpu_addr && size)
-               free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-
-struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
-       .mapping_error          = ehca_dma_mapping_error,
-       .map_single             = ehca_dma_map_single,
-       .unmap_single           = ehca_dma_unmap_single,
-       .map_page               = ehca_dma_map_page,
-       .unmap_page             = ehca_dma_unmap_page,
-       .map_sg                 = ehca_dma_map_sg,
-       .unmap_sg               = ehca_dma_unmap_sg,
-       .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
-       .sync_single_for_device = ehca_dma_sync_single_for_device,
-       .alloc_coherent         = ehca_dma_alloc_coherent,
-       .free_coherent          = ehca_dma_free_coherent,
-};
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
deleted file mode 100644 (file)
index 50d8b51..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW declarations and inline functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _EHCA_MRMW_H_
-#define _EHCA_MRMW_H_
-
-enum ehca_reg_type {
-       EHCA_REG_MR,
-       EHCA_REG_BUSMAP_MR
-};
-
-int ehca_reg_mr(struct ehca_shca *shca,
-               struct ehca_mr *e_mr,
-               u64 *iova_start,
-               u64 size,
-               int acl,
-               struct ehca_pd *e_pd,
-               struct ehca_mr_pginfo *pginfo,
-               u32 *lkey,
-               u32 *rkey,
-               enum ehca_reg_type reg_type);
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-                      struct ehca_mr *e_mr,
-                      struct ehca_mr_pginfo *pginfo);
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-                 struct ehca_mr *e_mr,
-                 u64 *iova_start,
-                 u64 size,
-                 int mr_access_flags,
-                 struct ehca_pd *e_pd,
-                 struct ehca_mr_pginfo *pginfo,
-                 u32 *lkey,
-                 u32 *rkey);
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-                      struct ehca_mr *e_fmr);
-
-int ehca_reg_smr(struct ehca_shca *shca,
-                struct ehca_mr *e_origmr,
-                struct ehca_mr *e_newmr,
-                u64 *iova_start,
-                int acl,
-                struct ehca_pd *e_pd,
-                u32 *lkey,
-                u32 *rkey);
-
-int ehca_reg_internal_maxmr(struct ehca_shca *shca,
-                           struct ehca_pd *e_pd,
-                           struct ehca_mr **maxmr);
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-                  struct ehca_mr *e_newmr,
-                  u64 *iova_start,
-                  int acl,
-                  struct ehca_pd *e_pd,
-                  u32 *lkey,
-                  u32 *rkey);
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
-                                 int num_phys_buf,
-                                 u64 *iova_start,
-                                 u64 *size);
-
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-                            u64 *page_list,
-                            int list_len);
-
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-                    u32 number,
-                    u64 *kpage);
-
-int ehca_mr_is_maxmr(u64 size,
-                    u64 *iova_start);
-
-void ehca_mrmw_map_acl(int ib_acl,
-                      u32 *hipz_acl);
-
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
-
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-                              int *ib_acl);
-
-void ehca_mr_deletenew(struct ehca_mr *mr);
-
-int ehca_create_busmap(void);
-
-void ehca_destroy_busmap(void);
-
-extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
-#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c
deleted file mode 100644 (file)
index 2a8aae4..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  PD functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-
-static struct kmem_cache *pd_cache;
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-                           struct ib_ucontext *context, struct ib_udata *udata)
-{
-       struct ehca_pd *pd;
-       int i;
-
-       pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
-       if (!pd) {
-               ehca_err(device, "device=%p context=%p out of memory",
-                        device, context);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       for (i = 0; i < 2; i++) {
-               INIT_LIST_HEAD(&pd->free[i]);
-               INIT_LIST_HEAD(&pd->full[i]);
-       }
-       mutex_init(&pd->lock);
-
-       /*
-        * Kernel PD: when device = -1, 0
-        * User   PD: when context != -1
-        */
-       if (!context) {
-               /*
-                * Kernel PDs after init reuses always
-                * the one created in ehca_shca_reopen()
-                */
-               struct ehca_shca *shca = container_of(device, struct ehca_shca,
-                                                     ib_device);
-               pd->fw_pd.value = shca->pd->fw_pd.value;
-       } else
-               pd->fw_pd.value = (u64)pd;
-
-       return &pd->ib_pd;
-}
-
-int ehca_dealloc_pd(struct ib_pd *pd)
-{
-       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-       int i, leftovers = 0;
-       struct ipz_small_queue_page *page, *tmp;
-
-       for (i = 0; i < 2; i++) {
-               list_splice(&my_pd->full[i], &my_pd->free[i]);
-               list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
-                       leftovers = 1;
-                       free_page(page->page);
-                       kmem_cache_free(small_qp_cache, page);
-               }
-       }
-
-       if (leftovers)
-               ehca_warn(pd->device,
-                         "Some small queue pages were not freed");
-
-       kmem_cache_free(pd_cache, my_pd);
-
-       return 0;
-}
-
-int ehca_init_pd_cache(void)
-{
-       pd_cache = kmem_cache_create("ehca_cache_pd",
-                                    sizeof(struct ehca_pd), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!pd_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_pd_cache(void)
-{
-       kmem_cache_destroy(pd_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h
deleted file mode 100644 (file)
index 90c4efa..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Hardware request structures
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _EHCA_QES_H_
-#define _EHCA_QES_H_
-
-#include "ehca_tools.h"
-
-/* virtual scatter gather entry to specify remote addresses with length */
-struct ehca_vsgentry {
-       u64 vaddr;
-       u32 lkey;
-       u32 length;
-};
-
-#define GRH_FLAG_MASK        EHCA_BMASK_IBM( 7,  7)
-#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM( 0,  3)
-#define GRH_TCLASS_MASK      EHCA_BMASK_IBM( 4, 12)
-#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13, 31)
-#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32, 47)
-#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48, 55)
-#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56, 63)
-
-/*
- * Unreliable Datagram Address Vector Format
- * see IBTA Vol1 chapter 8.3 Global Routing Header
- */
-struct ehca_ud_av {
-       u8 sl;
-       u8 lnh;
-       u16 dlid;
-       u8 reserved1;
-       u8 reserved2;
-       u8 reserved3;
-       u8 slid_path_bits;
-       u8 reserved4;
-       u8 ipd;
-       u8 reserved5;
-       u8 pmtu;
-       u32 reserved6;
-       u64 reserved7;
-       union {
-               struct {
-                       u64 word_0; /* always set to 6  */
-                       /*should be 0x1B for IB transport */
-                       u64 word_1;
-                       u64 word_2;
-                       u64 word_3;
-                       u64 word_4;
-               } grh;
-               struct {
-                       u32 wd_0;
-                       u32 wd_1;
-                       /* DWord_1 --> SGID */
-
-                       u32 sgid_wd3;
-                       u32 sgid_wd2;
-
-                       u32 sgid_wd1;
-                       u32 sgid_wd0;
-                       /* DWord_3 --> DGID */
-
-                       u32 dgid_wd3;
-                       u32 dgid_wd2;
-
-                       u32 dgid_wd1;
-                       u32 dgid_wd0;
-               } grh_l;
-       };
-};
-
-/* maximum number of sg entries allowed in a WQE */
-#define MAX_WQE_SG_ENTRIES 252
-
-#define WQE_OPTYPE_SEND             0x80
-#define WQE_OPTYPE_RDMAREAD         0x40
-#define WQE_OPTYPE_RDMAWRITE        0x20
-#define WQE_OPTYPE_CMPSWAP          0x10
-#define WQE_OPTYPE_FETCHADD         0x08
-#define WQE_OPTYPE_BIND             0x04
-
-#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
-#define WQE_WRFLAG_FENCE            0x40
-#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
-#define WQE_WRFLAG_SOLIC_EVENT      0x10
-
-#define WQEF_CACHE_HINT             0x80
-#define WQEF_CACHE_HINT_RD_WR       0x40
-#define WQEF_TIMED_WQE              0x20
-#define WQEF_PURGE                  0x08
-#define WQEF_HIGH_NIBBLE            0xF0
-
-#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
-#define MW_BIND_ACCESSCTRL_R_READ    0x20
-#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
-
-struct ehca_wqe {
-       u64 work_request_id;
-       u8 optype;
-       u8 wr_flag;
-       u16 pkeyi;
-       u8 wqef;
-       u8 nr_of_data_seg;
-       u16 wqe_provided_slid;
-       u32 destination_qp_number;
-       u32 resync_psn_sqp;
-       u32 local_ee_context_qkey;
-       u32 immediate_data;
-       union {
-               struct {
-                       u64 remote_virtual_address;
-                       u32 rkey;
-                       u32 reserved;
-                       u64 atomic_1st_op_dma_len;
-                       u64 atomic_2nd_op;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-
-               } nud;
-               struct {
-                       u64 ehca_ud_av_ptr;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 reserved3;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-               } ud_avp;
-               struct {
-                       struct ehca_ud_av ud_av;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
-                                                    2];
-               } ud_av;
-               struct {
-                       u64 reserved0;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 reserved3;
-                       struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-               } all_rcv;
-
-               struct {
-                       u64 reserved;
-                       u32 rkey;
-                       u32 old_rkey;
-                       u64 reserved1;
-                       u64 reserved2;
-                       u64 virtual_address;
-                       u32 reserved3;
-                       u32 length;
-                       u32 reserved4;
-                       u16 reserved5;
-                       u8 reserved6;
-                       u8 lr_ctl;
-                       u32 lkey;
-                       u32 reserved7;
-                       u64 reserved8;
-                       u64 reserved9;
-                       u64 reserved10;
-                       u64 reserved11;
-               } bind;
-               struct {
-                       u64 reserved12;
-                       u64 reserved13;
-                       u32 size;
-                       u32 start;
-               } inline_data;
-       } u;
-
-};
-
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
-#define WC_IMM_DATA     EHCA_BMASK_IBM(1, 1)
-#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2, 2)
-#define WC_SE_BIT       EHCA_BMASK_IBM(3, 3)
-#define WC_STATUS_ERROR_BIT 0x80000000
-#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
-#define WC_STATUS_PURGE_BIT 0x10
-#define WC_SEND_RECEIVE_BIT 0x80
-
-struct ehca_cqe {
-       u64 work_request_id;
-       u8 optype;
-       u8 w_completion_flags;
-       u16 reserved1;
-       u32 nr_bytes_transferred;
-       u32 immediate_data;
-       u32 local_qp_number;
-       u8 freed_resource_count;
-       u8 service_level;
-       u16 wqe_count;
-       u32 qp_token;
-       u32 qkey_ee_token;
-       u32 remote_qp_number;
-       u16 dlid;
-       u16 rlid;
-       u16 reserved2;
-       u16 pkey_index;
-       u32 cqe_timestamp;
-       u32 wqe_timestamp;
-       u8 wqe_timestamp_valid;
-       u8 reserved3;
-       u8 reserved4;
-       u8 cqe_flags;
-       u32 status;
-};
-
-struct ehca_eqe {
-       u64 entry;
-};
-
-struct ehca_mrte {
-       u64 starting_va;
-       u64 length; /* length of memory region in bytes*/
-       u32 pd;
-       u8 key_instance;
-       u8 pagesize;
-       u8 mr_control;
-       u8 local_remote_access_ctrl;
-       u8 reserved[0x20 - 0x18];
-       u64 at_pointer[4];
-};
-#endif /*_EHCA_QES_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c
deleted file mode 100644 (file)
index 896c01f..0000000
+++ /dev/null
@@ -1,2256 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  QP functions
- *
- *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
- *           Stefan Roscher <stefan.roscher@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-static struct kmem_cache *qp_cache;
-
-/*
- * attributes not supported by query qp
- */
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
-                                    IB_QP_EN_SQD_ASYNC_NOTIFY)
-
-/*
- * ehca (internal) qp state values
- */
-enum ehca_qp_state {
-       EHCA_QPS_RESET = 1,
-       EHCA_QPS_INIT = 2,
-       EHCA_QPS_RTR = 3,
-       EHCA_QPS_RTS = 5,
-       EHCA_QPS_SQD = 6,
-       EHCA_QPS_SQE = 8,
-       EHCA_QPS_ERR = 128
-};
-
-/*
- * qp state transitions as defined by IB Arch Rel 1.1 page 431
- */
-enum ib_qp_statetrans {
-       IB_QPST_ANY2RESET,
-       IB_QPST_ANY2ERR,
-       IB_QPST_RESET2INIT,
-       IB_QPST_INIT2RTR,
-       IB_QPST_INIT2INIT,
-       IB_QPST_RTR2RTS,
-       IB_QPST_RTS2SQD,
-       IB_QPST_RTS2RTS,
-       IB_QPST_SQD2RTS,
-       IB_QPST_SQE2RTS,
-       IB_QPST_SQD2SQD,
-       IB_QPST_MAX     /* nr of transitions, this must be last!!! */
-};
-
-/*
- * ib2ehca_qp_state maps IB to ehca qp_state
- * returns ehca qp state corresponding to given ib qp state
- */
-static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
-{
-       switch (ib_qp_state) {
-       case IB_QPS_RESET:
-               return EHCA_QPS_RESET;
-       case IB_QPS_INIT:
-               return EHCA_QPS_INIT;
-       case IB_QPS_RTR:
-               return EHCA_QPS_RTR;
-       case IB_QPS_RTS:
-               return EHCA_QPS_RTS;
-       case IB_QPS_SQD:
-               return EHCA_QPS_SQD;
-       case IB_QPS_SQE:
-               return EHCA_QPS_SQE;
-       case IB_QPS_ERR:
-               return EHCA_QPS_ERR;
-       default:
-               ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
-               return -EINVAL;
-       }
-}
-
-/*
- * ehca2ib_qp_state maps ehca to IB qp_state
- * returns ib qp state corresponding to given ehca qp state
- */
-static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
-                                               ehca_qp_state)
-{
-       switch (ehca_qp_state) {
-       case EHCA_QPS_RESET:
-               return IB_QPS_RESET;
-       case EHCA_QPS_INIT:
-               return IB_QPS_INIT;
-       case EHCA_QPS_RTR:
-               return IB_QPS_RTR;
-       case EHCA_QPS_RTS:
-               return IB_QPS_RTS;
-       case EHCA_QPS_SQD:
-               return IB_QPS_SQD;
-       case EHCA_QPS_SQE:
-               return IB_QPS_SQE;
-       case EHCA_QPS_ERR:
-               return IB_QPS_ERR;
-       default:
-               ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
-               return -EINVAL;
-       }
-}
-
-/*
- * ehca_qp_type used as index for req_attr and opt_attr of
- * struct ehca_modqp_statetrans
- */
-enum ehca_qp_type {
-       QPT_RC = 0,
-       QPT_UC = 1,
-       QPT_UD = 2,
-       QPT_SQP = 3,
-       QPT_MAX
-};
-
-/*
- * ib2ehcaqptype maps Ib to ehca qp_type
- * returns ehca qp type corresponding to ib qp type
- */
-static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
-{
-       switch (ibqptype) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return QPT_SQP;
-       case IB_QPT_RC:
-               return QPT_RC;
-       case IB_QPT_UC:
-               return QPT_UC;
-       case IB_QPT_UD:
-               return QPT_UD;
-       default:
-               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-               return -EINVAL;
-       }
-}
-
-static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
-                                                        int ib_tostate)
-{
-       int index = -EINVAL;
-       switch (ib_tostate) {
-       case IB_QPS_RESET:
-               index = IB_QPST_ANY2RESET;
-               break;
-       case IB_QPS_INIT:
-               switch (ib_fromstate) {
-               case IB_QPS_RESET:
-                       index = IB_QPST_RESET2INIT;
-                       break;
-               case IB_QPS_INIT:
-                       index = IB_QPST_INIT2INIT;
-                       break;
-               }
-               break;
-       case IB_QPS_RTR:
-               if (ib_fromstate == IB_QPS_INIT)
-                       index = IB_QPST_INIT2RTR;
-               break;
-       case IB_QPS_RTS:
-               switch (ib_fromstate) {
-               case IB_QPS_RTR:
-                       index = IB_QPST_RTR2RTS;
-                       break;
-               case IB_QPS_RTS:
-                       index = IB_QPST_RTS2RTS;
-                       break;
-               case IB_QPS_SQD:
-                       index = IB_QPST_SQD2RTS;
-                       break;
-               case IB_QPS_SQE:
-                       index = IB_QPST_SQE2RTS;
-                       break;
-               }
-               break;
-       case IB_QPS_SQD:
-               if (ib_fromstate == IB_QPS_RTS)
-                       index = IB_QPST_RTS2SQD;
-               break;
-       case IB_QPS_SQE:
-               break;
-       case IB_QPS_ERR:
-               index = IB_QPST_ANY2ERR;
-               break;
-       default:
-               break;
-       }
-       return index;
-}
-
-/*
- * ibqptype2servicetype returns hcp service type corresponding to given
- * ib qp type used by create_qp()
- */
-static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
-{
-       switch (ibqptype) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               return ST_UD;
-       case IB_QPT_RC:
-               return ST_RC;
-       case IB_QPT_UC:
-               return ST_UC;
-       case IB_QPT_UD:
-               return ST_UD;
-       case IB_QPT_RAW_IPV6:
-               return -EINVAL;
-       case IB_QPT_RAW_ETHERTYPE:
-               return -EINVAL;
-       default:
-               ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-               return -EINVAL;
-       }
-}
-
-/*
- * init userspace queue info from ipz_queue data
- */
-static inline void queue2resp(struct ipzu_queue_resp *resp,
-                             struct ipz_queue *queue)
-{
-       resp->qe_size = queue->qe_size;
-       resp->act_nr_of_sg = queue->act_nr_of_sg;
-       resp->queue_length = queue->queue_length;
-       resp->pagesize = queue->pagesize;
-       resp->toggle_state = queue->toggle_state;
-       resp->offset = queue->offset;
-}
-
-/*
- * init_qp_queue initializes/constructs r/squeue and registers queue pages.
- */
-static inline int init_qp_queue(struct ehca_shca *shca,
-                               struct ehca_pd *pd,
-                               struct ehca_qp *my_qp,
-                               struct ipz_queue *queue,
-                               int q_type,
-                               u64 expected_hret,
-                               struct ehca_alloc_queue_parms *parms,
-                               int wqe_size)
-{
-       int ret, cnt, ipz_rc, nr_q_pages;
-       void *vpage;
-       u64 rpage, h_ret;
-       struct ib_device *ib_dev = &shca->ib_device;
-       struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
-
-       if (!parms->queue_size)
-               return 0;
-
-       if (parms->is_small) {
-               nr_q_pages = 1;
-               ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-                                       128 << parms->page_size,
-                                       wqe_size, parms->act_nr_sges, 1);
-       } else {
-               nr_q_pages = parms->queue_size;
-               ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-                                       EHCA_PAGESIZE, wqe_size,
-                                       parms->act_nr_sges, 0);
-       }
-
-       if (!ipz_rc) {
-               ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
-                        ipz_rc);
-               return -EBUSY;
-       }
-
-       /* register queue pages */
-       for (cnt = 0; cnt < nr_q_pages; cnt++) {
-               vpage = ipz_qpageit_get_inc(queue);
-               if (!vpage) {
-                       ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-                                "failed p_vpage= %p", vpage);
-                       ret = -EINVAL;
-                       goto init_qp_queue1;
-               }
-               rpage = __pa(vpage);
-
-               h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-                                                my_qp->ipz_qp_handle,
-                                                NULL, 0, q_type,
-                                                rpage, parms->is_small ? 0 : 1,
-                                                my_qp->galpas.kernel);
-               if (cnt == (nr_q_pages - 1)) {  /* last page! */
-                       if (h_ret != expected_hret) {
-                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret=%lli", h_ret);
-                               ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queue1;
-                       }
-                       vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-                       if (vpage) {
-                               ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-                                        "should not succeed vpage=%p", vpage);
-                               ret = -EINVAL;
-                               goto init_qp_queue1;
-                       }
-               } else {
-                       if (h_ret != H_PAGE_REGISTERED) {
-                               ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret=%lli", h_ret);
-                               ret = ehca2ib_return_code(h_ret);
-                               goto init_qp_queue1;
-                       }
-               }
-       }
-
-       ipz_qeit_reset(queue);
-
-       return 0;
-
-init_qp_queue1:
-       ipz_queue_dtor(pd, queue);
-       return ret;
-}
-
-static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
-{
-       if (is_llqp)
-               return 128 << act_nr_sge;
-       else
-               return offsetof(struct ehca_wqe,
-                               u.nud.sg_list[act_nr_sge]);
-}
-
-static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
-                                      int req_nr_sge, int is_llqp)
-{
-       u32 wqe_size, q_size;
-       int act_nr_sge = req_nr_sge;
-
-       if (!is_llqp)
-               /* round up #SGEs so WQE size is a power of 2 */
-               for (act_nr_sge = 4; act_nr_sge <= 252;
-                    act_nr_sge = 4 + 2 * act_nr_sge)
-                       if (act_nr_sge >= req_nr_sge)
-                               break;
-
-       wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
-       q_size = wqe_size * (queue->max_wr + 1);
-
-       if (q_size <= 512)
-               queue->page_size = 2;
-       else if (q_size <= 1024)
-               queue->page_size = 3;
-       else
-               queue->page_size = 0;
-
-       queue->is_small = (queue->page_size != 0);
-}
-
-/* needs to be called with cq->spinlock held */
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
-{
-       struct list_head *list, *node;
-
-       /* TODO: support low latency QPs */
-       if (qp->ext_type == EQPT_LLQP)
-               return;
-
-       if (on_sq) {
-               list = &qp->send_cq->sqp_err_list;
-               node = &qp->sq_err_node;
-       } else {
-               list = &qp->recv_cq->rqp_err_list;
-               node = &qp->rq_err_node;
-       }
-
-       if (list_empty(node))
-               list_add_tail(node, list);
-
-       return;
-}
-
-static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->spinlock, flags);
-
-       if (!list_empty(node))
-               list_del_init(node);
-
-       spin_unlock_irqrestore(&cq->spinlock, flags);
-}
-
-static void reset_queue_map(struct ehca_queue_map *qmap)
-{
-       int i;
-
-       qmap->tail = qmap->entries - 1;
-       qmap->left_to_poll = 0;
-       qmap->next_wqe_idx = 0;
-       for (i = 0; i < qmap->entries; i++) {
-               qmap->map[i].reported = 1;
-               qmap->map[i].cqe_req = 0;
-       }
-}
-
-/*
- * Create an ib_qp struct that is either a QP or an SRQ, depending on
- * the value of the is_srq parameter. If init_attr and srq_init_attr share
- * fields, the field out of init_attr is used.
- */
-static struct ehca_qp *internal_create_qp(
-       struct ib_pd *pd,
-       struct ib_qp_init_attr *init_attr,
-       struct ib_srq_init_attr *srq_init_attr,
-       struct ib_udata *udata, int is_srq)
-{
-       struct ehca_qp *my_qp, *my_srq = NULL;
-       struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-       struct ib_ucontext *context = NULL;
-       u64 h_ret;
-       int is_llqp = 0, has_srq = 0, is_user = 0;
-       int qp_type, max_send_sge, max_recv_sge, ret;
-
-       /* h_call's out parameters */
-       struct ehca_alloc_qp_parms parms;
-       u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
-       unsigned long flags;
-
-       if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
-               ehca_err(pd->device, "Unable to create QP, max number of %i "
-                        "QPs reached.", shca->max_num_qps);
-               ehca_err(pd->device, "To increase the maximum number of QPs "
-                        "use the number_of_qps module parameter.\n");
-               return ERR_PTR(-ENOSPC);
-       }
-
-       if (init_attr->create_flags) {
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       memset(&parms, 0, sizeof(parms));
-       qp_type = init_attr->qp_type;
-
-       if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
-               init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
-               ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
-                        init_attr->sq_sig_type);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* save LLQP info */
-       if (qp_type & 0x80) {
-               is_llqp = 1;
-               parms.ext_type = EQPT_LLQP;
-               parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
-       }
-       qp_type &= 0x1F;
-       init_attr->qp_type &= 0x1F;
-
-       /* handle SRQ base QPs */
-       if (init_attr->srq) {
-               my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
-
-               if (qp_type == IB_QPT_UC) {
-                       ehca_err(pd->device, "UC with SRQ not supported");
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-
-               has_srq = 1;
-               parms.ext_type = EQPT_SRQBASE;
-               parms.srq_qpn = my_srq->real_qp_num;
-       }
-
-       if (is_llqp && has_srq) {
-               ehca_err(pd->device, "LLQPs can't have an SRQ");
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* handle SRQs */
-       if (is_srq) {
-               parms.ext_type = EQPT_SRQ;
-               parms.srq_limit = srq_init_attr->attr.srq_limit;
-               if (init_attr->cap.max_recv_sge > 3) {
-                       ehca_err(pd->device, "no more than three SGEs "
-                                "supported for SRQ  pd=%p  max_sge=%x",
-                                pd, init_attr->cap.max_recv_sge);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       /* check QP type */
-       if (qp_type != IB_QPT_UD &&
-           qp_type != IB_QPT_UC &&
-           qp_type != IB_QPT_RC &&
-           qp_type != IB_QPT_SMI &&
-           qp_type != IB_QPT_GSI) {
-               ehca_err(pd->device, "wrong QP Type=%x", qp_type);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (is_llqp) {
-               switch (qp_type) {
-               case IB_QPT_RC:
-                       if ((init_attr->cap.max_send_wr > 255) ||
-                           (init_attr->cap.max_recv_wr > 255)) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of max_sq_wr=%x "
-                                        "or max_rq_wr=%x for RC LLQP",
-                                        init_attr->cap.max_send_wr,
-                                        init_attr->cap.max_recv_wr);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       }
-                       break;
-               case IB_QPT_UD:
-                       if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
-                               ehca_err(pd->device, "UD LLQP not supported "
-                                        "by this adapter");
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-ENOSYS);
-                       }
-                       if (!(init_attr->cap.max_send_sge <= 5
-                           && init_attr->cap.max_send_sge >= 1
-                           && init_attr->cap.max_recv_sge <= 5
-                           && init_attr->cap.max_recv_sge >= 1)) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of max_send_sge=%x "
-                                        "or max_recv_sge=%x for UD LLQP",
-                                        init_attr->cap.max_send_sge,
-                                        init_attr->cap.max_recv_sge);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       } else if (init_attr->cap.max_send_wr > 255) {
-                               ehca_err(pd->device,
-                                        "Invalid Number of "
-                                        "max_send_wr=%x for UD QP_TYPE=%x",
-                                        init_attr->cap.max_send_wr, qp_type);
-                               atomic_dec(&shca->num_qps);
-                               return ERR_PTR(-EINVAL);
-                       }
-                       break;
-               default:
-                       ehca_err(pd->device, "unsupported LL QP Type=%x",
-                                qp_type);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       } else {
-               int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
-                              || qp_type == IB_QPT_GSI) ? 250 : 252;
-
-               if (init_attr->cap.max_send_sge > max_sge
-                   || init_attr->cap.max_recv_sge > max_sge) {
-                       ehca_err(pd->device, "Invalid number of SGEs requested "
-                                "send_sge=%x recv_sge=%x max_sge=%x",
-                                init_attr->cap.max_send_sge,
-                                init_attr->cap.max_recv_sge, max_sge);
-                       atomic_dec(&shca->num_qps);
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
-       if (!my_qp) {
-               ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
-               atomic_dec(&shca->num_qps);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       if (pd->uobject && udata) {
-               is_user = 1;
-               context = pd->uobject->context;
-       }
-
-       atomic_set(&my_qp->nr_events, 0);
-       init_waitqueue_head(&my_qp->wait_completion);
-       spin_lock_init(&my_qp->spinlock_s);
-       spin_lock_init(&my_qp->spinlock_r);
-       my_qp->qp_type = qp_type;
-       my_qp->ext_type = parms.ext_type;
-       my_qp->state = IB_QPS_RESET;
-
-       if (init_attr->recv_cq)
-               my_qp->recv_cq =
-                       container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-       if (init_attr->send_cq)
-               my_qp->send_cq =
-                       container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-       idr_preload(GFP_KERNEL);
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-
-       ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
-       if (ret >= 0)
-               my_qp->token = ret;
-
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-       idr_preload_end();
-       if (ret < 0) {
-               if (ret == -ENOSPC) {
-                       ret = -EINVAL;
-                       ehca_err(pd->device, "Invalid number of qp");
-               } else {
-                       ret = -ENOMEM;
-                       ehca_err(pd->device, "Can't allocate new idr entry.");
-               }
-               goto create_qp_exit0;
-       }
-
-       if (has_srq)
-               parms.srq_token = my_qp->token;
-
-       parms.servicetype = ibqptype2servicetype(qp_type);
-       if (parms.servicetype < 0) {
-               ret = -EINVAL;
-               ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
-               goto create_qp_exit1;
-       }
-
-       /* Always signal by WQE so we can hide circ. WQEs */
-       parms.sigtype = HCALL_SIGT_BY_WQE;
-
-       /* UD_AV CIRCUMVENTION */
-       max_send_sge = init_attr->cap.max_send_sge;
-       max_recv_sge = init_attr->cap.max_recv_sge;
-       if (parms.servicetype == ST_UD && !is_llqp) {
-               max_send_sge += 2;
-               max_recv_sge += 2;
-       }
-
-       parms.token = my_qp->token;
-       parms.eq_handle = shca->eq.ipz_eq_handle;
-       parms.pd = my_pd->fw_pd;
-       if (my_qp->send_cq)
-               parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
-       if (my_qp->recv_cq)
-               parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
-
-       parms.squeue.max_wr = init_attr->cap.max_send_wr;
-       parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
-       parms.squeue.max_sge = max_send_sge;
-       parms.rqueue.max_sge = max_recv_sge;
-
-       /* RC QPs need one more SWQE for unsolicited ack circumvention */
-       if (qp_type == IB_QPT_RC)
-               parms.squeue.max_wr++;
-
-       if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
-               if (HAS_SQ(my_qp))
-                       ehca_determine_small_queue(
-                               &parms.squeue, max_send_sge, is_llqp);
-               if (HAS_RQ(my_qp))
-                       ehca_determine_small_queue(
-                               &parms.rqueue, max_recv_sge, is_llqp);
-               parms.qp_storage =
-                       (parms.squeue.is_small || parms.rqueue.is_small);
-       }
-
-       h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
-                        h_ret);
-               ret = ehca2ib_return_code(h_ret);
-               goto create_qp_exit1;
-       }
-
-       ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
-       my_qp->ipz_qp_handle = parms.qp_handle;
-       my_qp->galpas = parms.galpas;
-
-       swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
-       rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
-
-       switch (qp_type) {
-       case IB_QPT_RC:
-               if (is_llqp) {
-                       parms.squeue.act_nr_sges = 1;
-                       parms.rqueue.act_nr_sges = 1;
-               }
-               /* hide the extra WQE */
-               parms.squeue.act_nr_wqes--;
-               break;
-       case IB_QPT_UD:
-       case IB_QPT_GSI:
-       case IB_QPT_SMI:
-               /* UD circumvention */
-               if (is_llqp) {
-                       parms.squeue.act_nr_sges = 1;
-                       parms.rqueue.act_nr_sges = 1;
-               } else {
-                       parms.squeue.act_nr_sges -= 2;
-                       parms.rqueue.act_nr_sges -= 2;
-               }
-
-               if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
-                       parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
-                       parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
-                       parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
-                       parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
-                       ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
-               }
-
-               break;
-
-       default:
-               break;
-       }
-
-       /* initialize r/squeue and register queue pages */
-       if (HAS_SQ(my_qp)) {
-               ret = init_qp_queue(
-                       shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
-                       HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
-                       &parms.squeue, swqe_size);
-               if (ret) {
-                       ehca_err(pd->device, "Couldn't initialize squeue "
-                                "and pages ret=%i", ret);
-                       goto create_qp_exit2;
-               }
-
-               if (!is_user) {
-                       my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-                               my_qp->ipz_squeue.qe_size;
-                       my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
-                                                   sizeof(struct ehca_qmap_entry));
-                       if (!my_qp->sq_map.map) {
-                               ehca_err(pd->device, "Couldn't allocate squeue "
-                                        "map ret=%i", ret);
-                               goto create_qp_exit3;
-                       }
-                       INIT_LIST_HEAD(&my_qp->sq_err_node);
-                       /* to avoid the generation of bogus flush CQEs */
-                       reset_queue_map(&my_qp->sq_map);
-               }
-       }
-
-       if (HAS_RQ(my_qp)) {
-               ret = init_qp_queue(
-                       shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
-                       H_SUCCESS, &parms.rqueue, rwqe_size);
-               if (ret) {
-                       ehca_err(pd->device, "Couldn't initialize rqueue "
-                                "and pages ret=%i", ret);
-                       goto create_qp_exit4;
-               }
-               if (!is_user) {
-                       my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-                               my_qp->ipz_rqueue.qe_size;
-                       my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
-                                                   sizeof(struct ehca_qmap_entry));
-                       if (!my_qp->rq_map.map) {
-                               ehca_err(pd->device, "Couldn't allocate squeue "
-                                        "map ret=%i", ret);
-                               goto create_qp_exit5;
-                       }
-                       INIT_LIST_HEAD(&my_qp->rq_err_node);
-                       /* to avoid the generation of bogus flush CQEs */
-                       reset_queue_map(&my_qp->rq_map);
-               }
-       } else if (init_attr->srq && !is_user) {
-               /* this is a base QP, use the queue map of the SRQ */
-               my_qp->rq_map = my_srq->rq_map;
-               INIT_LIST_HEAD(&my_qp->rq_err_node);
-
-               my_qp->ipz_rqueue = my_srq->ipz_rqueue;
-       }
-
-       if (is_srq) {
-               my_qp->ib_srq.pd = &my_pd->ib_pd;
-               my_qp->ib_srq.device = my_pd->ib_pd.device;
-
-               my_qp->ib_srq.srq_context = init_attr->qp_context;
-               my_qp->ib_srq.event_handler = init_attr->event_handler;
-       } else {
-               my_qp->ib_qp.qp_num = ib_qp_num;
-               my_qp->ib_qp.pd = &my_pd->ib_pd;
-               my_qp->ib_qp.device = my_pd->ib_pd.device;
-
-               my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-               my_qp->ib_qp.send_cq = init_attr->send_cq;
-
-               my_qp->ib_qp.qp_type = qp_type;
-               my_qp->ib_qp.srq = init_attr->srq;
-
-               my_qp->ib_qp.qp_context = init_attr->qp_context;
-               my_qp->ib_qp.event_handler = init_attr->event_handler;
-       }
-
-       init_attr->cap.max_inline_data = 0; /* not supported yet */
-       init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
-       init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
-       init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
-       init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
-       my_qp->init_attr = *init_attr;
-
-       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-               shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-                       &my_qp->ib_qp;
-               if (ehca_nr_ports < 0) {
-                       /* alloc array to cache subsequent modify qp parms
-                        * for autodetect mode
-                        */
-                       my_qp->mod_qp_parm =
-                               kzalloc(EHCA_MOD_QP_PARM_MAX *
-                                       sizeof(*my_qp->mod_qp_parm),
-                                       GFP_KERNEL);
-                       if (!my_qp->mod_qp_parm) {
-                               ehca_err(pd->device,
-                                        "Could not alloc mod_qp_parm");
-                               goto create_qp_exit5;
-                       }
-               }
-       }
-
-       /* NOTE: define_apq0() not supported yet */
-       if (qp_type == IB_QPT_GSI) {
-               h_ret = ehca_define_sqp(shca, my_qp, init_attr);
-               if (h_ret != H_SUCCESS) {
-                       kfree(my_qp->mod_qp_parm);
-                       my_qp->mod_qp_parm = NULL;
-                       /* the QP pointer is no longer valid */
-                       shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-                               NULL;
-                       ret = ehca2ib_return_code(h_ret);
-                       goto create_qp_exit6;
-               }
-       }
-
-       if (my_qp->send_cq) {
-               ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
-               if (ret) {
-                       ehca_err(pd->device,
-                                "Couldn't assign qp to send_cq ret=%i", ret);
-                       goto create_qp_exit7;
-               }
-       }
-
-       /* copy queues, galpa data to user space */
-       if (context && udata) {
-               struct ehca_create_qp_resp resp;
-               memset(&resp, 0, sizeof(resp));
-
-               resp.qp_num = my_qp->real_qp_num;
-               resp.token = my_qp->token;
-               resp.qp_type = my_qp->qp_type;
-               resp.ext_type = my_qp->ext_type;
-               resp.qkey = my_qp->qkey;
-               resp.real_qp_num = my_qp->real_qp_num;
-
-               if (HAS_SQ(my_qp))
-                       queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
-               if (HAS_RQ(my_qp))
-                       queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
-               resp.fw_handle_ofs = (u32)
-                       (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
-
-               if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
-                       ehca_err(pd->device, "Copy to udata failed");
-                       ret = -EINVAL;
-                       goto create_qp_exit8;
-               }
-       }
-
-       return my_qp;
-
-create_qp_exit8:
-       ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-
-create_qp_exit7:
-       kfree(my_qp->mod_qp_parm);
-
-create_qp_exit6:
-       if (HAS_RQ(my_qp) && !is_user)
-               vfree(my_qp->rq_map.map);
-
-create_qp_exit5:
-       if (HAS_RQ(my_qp))
-               ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-
-create_qp_exit4:
-       if (HAS_SQ(my_qp) && !is_user)
-               vfree(my_qp->sq_map.map);
-
-create_qp_exit3:
-       if (HAS_SQ(my_qp))
-               ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-
-create_qp_exit2:
-       hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-
-create_qp_exit1:
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-       idr_remove(&ehca_qp_idr, my_qp->token);
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-create_qp_exit0:
-       kmem_cache_free(qp_cache, my_qp);
-       atomic_dec(&shca->num_qps);
-       return ERR_PTR(ret);
-}
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-                            struct ib_qp_init_attr *qp_init_attr,
-                            struct ib_udata *udata)
-{
-       struct ehca_qp *ret;
-
-       ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
-       return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-                              struct ib_uobject *uobject);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-                              struct ib_srq_init_attr *srq_init_attr,
-                              struct ib_udata *udata)
-{
-       struct ib_qp_init_attr qp_init_attr;
-       struct ehca_qp *my_qp;
-       struct ib_srq *ret;
-       struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-                                             ib_device);
-       struct hcp_modify_qp_control_block *mqpcb;
-       u64 hret, update_mask;
-
-       if (srq_init_attr->srq_type != IB_SRQT_BASIC)
-               return ERR_PTR(-ENOSYS);
-
-       /* For common attributes, internal_create_qp() takes its info
-        * out of qp_init_attr, so copy all common attrs there.
-        */
-       memset(&qp_init_attr, 0, sizeof(qp_init_attr));
-       qp_init_attr.event_handler = srq_init_attr->event_handler;
-       qp_init_attr.qp_context = srq_init_attr->srq_context;
-       qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.qp_type = IB_QPT_RC;
-       qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
-       qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
-
-       my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
-       if (IS_ERR(my_qp))
-               return (struct ib_srq *)my_qp;
-
-       /* copy back return values */
-       srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
-       srq_init_attr->attr.max_sge = 3;
-
-       /* drive SRQ into RTR state */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!mqpcb) {
-               ehca_err(pd->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-               ret = ERR_PTR(-ENOMEM);
-               goto create_srq1;
-       }
-
-       mqpcb->qp_state = EHCA_QPS_INIT;
-       mqpcb->prim_phys_port = 1;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not modify SRQ to INIT "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       mqpcb->qp_enable = 1;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not enable SRQ "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       mqpcb->qp_state  = EHCA_QPS_RTR;
-       update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               update_mask,
-                               mqpcb, my_qp->galpas.kernel);
-       if (hret != H_SUCCESS) {
-               ehca_err(pd->device, "Could not modify SRQ to RTR "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, hret);
-               goto create_srq2;
-       }
-
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return &my_qp->ib_srq;
-
-create_srq2:
-       ret = ERR_PTR(ehca2ib_return_code(hret));
-       ehca_free_fw_ctrlblock(mqpcb);
-
-create_srq1:
-       internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
-
-       return ret;
-}
-
-/*
- * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
- * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
- * returns total number of bad wqes in bad_wqe_cnt
- */
-static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
-                          int *bad_wqe_cnt)
-{
-       u64 h_ret;
-       struct ipz_queue *squeue;
-       void *bad_send_wqe_p, *bad_send_wqe_v;
-       u64 q_ofs;
-       struct ehca_wqe *wqe;
-       int qp_num = my_qp->ib_qp.qp_num;
-
-       /* get send wqe pointer */
-       h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-                                          my_qp->ipz_qp_handle, &my_qp->pf,
-                                          &bad_send_wqe_p, NULL, 2);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
-                        " ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, qp_num, h_ret);
-               return ehca2ib_return_code(h_ret);
-       }
-       bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
-       ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
-                qp_num, bad_send_wqe_p);
-       /* convert wqe pointer to vadr */
-       bad_send_wqe_v = __va((u64)bad_send_wqe_p);
-       if (ehca_debug_level >= 2)
-               ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
-       squeue = &my_qp->ipz_squeue;
-       if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
-               ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
-                        " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
-               return -EFAULT;
-       }
-
-       /* loop sets wqe's purge bit */
-       wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-       *bad_wqe_cnt = 0;
-       while (wqe->optype != 0xff && wqe->wqef != 0xff) {
-               if (ehca_debug_level >= 2)
-                       ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
-               wqe->nr_of_data_seg = 0; /* suppress data access */
-               wqe->wqef = WQEF_PURGE; /* WQE to be purged */
-               q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
-               wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-               *bad_wqe_cnt = (*bad_wqe_cnt)+1;
-       }
-       /*
-        * bad wqe will be reprocessed and ignored when pol_cq() is called,
-        *  i.e. nr of wqes with flush error status is one less
-        */
-       ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
-                qp_num, (*bad_wqe_cnt)-1);
-       wqe->wqef = 0;
-
-       return 0;
-}
-
-static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
-                         struct ehca_queue_map *qmap)
-{
-       void *wqe_v;
-       u64 q_ofs;
-       u32 wqe_idx;
-       unsigned int tail_idx;
-
-       /* convert real to abs address */
-       wqe_p = wqe_p & (~(1UL << 63));
-
-       wqe_v = __va(wqe_p);
-
-       if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
-               ehca_gen_err("Invalid offset for calculating left cqes "
-                               "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
-               return -EFAULT;
-       }
-
-       tail_idx = next_index(qmap->tail, qmap->entries);
-       wqe_idx = q_ofs / ipz_queue->qe_size;
-
-       /* check all processed wqes, whether a cqe is requested or not */
-       while (tail_idx != wqe_idx) {
-               if (qmap->map[tail_idx].cqe_req)
-                       qmap->left_to_poll++;
-               tail_idx = next_index(tail_idx, qmap->entries);
-       }
-       /* save index in queue, where we have to start flushing */
-       qmap->next_wqe_idx = wqe_idx;
-       return 0;
-}
-
-static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
-{
-       u64 h_ret;
-       void *send_wqe_p, *recv_wqe_p;
-       int ret;
-       unsigned long flags;
-       int qp_num = my_qp->ib_qp.qp_num;
-
-       /* this hcall is not supported on base QPs */
-       if (my_qp->ext_type != EQPT_SRQBASE) {
-               /* get send and receive wqe pointer */
-               h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle, &my_qp->pf,
-                               &send_wqe_p, &recv_wqe_p, 4);
-               if (h_ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device, "disable_and_get_wqe() "
-                                "failed ehca_qp=%p qp_num=%x h_ret=%lli",
-                                my_qp, qp_num, h_ret);
-                       return ehca2ib_return_code(h_ret);
-               }
-
-               /*
-                * acquire lock to ensure that nobody is polling the cq which
-                * could mean that the qmap->tail pointer is in an
-                * inconsistent state.
-                */
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
-                               &my_qp->sq_map);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-               if (ret)
-                       return ret;
-
-
-               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-               ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
-                               &my_qp->rq_map);
-               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-               if (ret)
-                       return ret;
-       } else {
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               my_qp->sq_map.left_to_poll = 0;
-               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-                                                       my_qp->sq_map.entries);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-               spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-               my_qp->rq_map.left_to_poll = 0;
-               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-                                                       my_qp->rq_map.entries);
-               spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-       }
-
-       /* this assures flush cqes being generated only for pending wqes */
-       if ((my_qp->sq_map.left_to_poll == 0) &&
-                               (my_qp->rq_map.left_to_poll == 0)) {
-               spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-               ehca_add_to_err_list(my_qp, 1);
-               spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-               if (HAS_RQ(my_qp)) {
-                       spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-                       ehca_add_to_err_list(my_qp, 0);
-                       spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
-                                       flags);
-               }
-       }
-
-       return 0;
-}
-
-/*
- * internal_modify_qp with circumvention to handle aqp0 properly
- * smi_reset2init indicates if this is an internal reset-to-init-call for
- * smi. This flag must always be zero if called from ehca_modify_qp()!
- * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
- */
-static int internal_modify_qp(struct ib_qp *ibqp,
-                             struct ib_qp_attr *attr,
-                             int attr_mask, int smi_reset2init)
-{
-       enum ib_qp_state qp_cur_state, qp_new_state;
-       int cnt, qp_attr_idx, ret = 0;
-       enum ib_qp_statetrans statetrans;
-       struct hcp_modify_qp_control_block *mqpcb;
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca =
-               container_of(ibqp->pd->device, struct ehca_shca, ib_device);
-       u64 update_mask;
-       u64 h_ret;
-       int bad_wqe_cnt = 0;
-       int is_user = 0;
-       int squeue_locked = 0;
-       unsigned long flags = 0;
-
-       /* do query_qp to obtain current attr values */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-       if (!mqpcb) {
-               ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               mqpcb, my_qp->galpas.kernel);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(ibqp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, ibqp->qp_num, h_ret);
-               ret = ehca2ib_return_code(h_ret);
-               goto modify_qp_exit1;
-       }
-       if (ibqp->uobject)
-               is_user = 1;
-
-       qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
-
-       if (qp_cur_state == -EINVAL) {  /* invalid qp state */
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        mqpcb->qp_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-       /*
-        * circumvention to set aqp0 initial state to init
-        * as expected by IB spec
-        */
-       if (smi_reset2init == 0 &&
-           ibqp->qp_type == IB_QPT_SMI &&
-           qp_cur_state == IB_QPS_RESET &&
-           (attr_mask & IB_QP_STATE) &&
-           attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
-               struct ib_qp_attr smiqp_attr = {
-                       .qp_state = IB_QPS_INIT,
-                       .port_num = my_qp->init_attr.port_num,
-                       .pkey_index = 0,
-                       .qkey = 0
-               };
-               int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
-                       IB_QP_PKEY_INDEX | IB_QP_QKEY;
-               int smirc = internal_modify_qp(
-                       ibqp, &smiqp_attr, smiqp_attr_mask, 1);
-               if (smirc) {
-                       ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
-                                "ehca_modify_qp() rc=%i", smirc);
-                       ret = H_PARAMETER;
-                       goto modify_qp_exit1;
-               }
-               qp_cur_state = IB_QPS_INIT;
-               ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
-       }
-       /* is transmitted current state  equal to "real" current state */
-       if ((attr_mask & IB_QP_CUR_STATE) &&
-           qp_cur_state != attr->cur_qp_state) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device,
-                        "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
-                        " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
-                        attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
-                "new qp_state=%x attribute_mask=%x",
-                my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
-
-       qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
-       if (!smi_reset2init &&
-           !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
-                               attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device,
-                        "Invalid qp transition new_state=%x cur_state=%x "
-                        "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
-                        qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
-               goto modify_qp_exit1;
-       }
-
-       mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
-       if (mqpcb->qp_state)
-               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-       else {
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "Invalid new qp state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        qp_new_state, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       /* retrieve state transition struct to get req and opt attrs */
-       statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
-       if (statetrans < 0) {
-               ret = -EINVAL;
-               ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
-                        "new_qp_state=%x State_xsition=%x ehca_qp=%p "
-                        "qp_num=%x", qp_cur_state, qp_new_state,
-                        statetrans, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
-
-       if (qp_attr_idx < 0) {
-               ret = qp_attr_idx;
-               ehca_err(ibqp->device,
-                        "Invalid QP type=%x ehca_qp=%p qp_num=%x",
-                        ibqp->qp_type, my_qp, ibqp->qp_num);
-               goto modify_qp_exit1;
-       }
-
-       ehca_dbg(ibqp->device,
-                "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
-                my_qp, ibqp->qp_num, statetrans);
-
-       /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
-        * in non-LL UD QPs.
-        */
-       if ((my_qp->qp_type == IB_QPT_UD) &&
-           (my_qp->ext_type != EQPT_LLQP) &&
-           (statetrans == IB_QPST_INIT2RTR) &&
-           (shca->hw_level >= 0x22)) {
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-               mqpcb->send_grh_flag = 1;
-       }
-
-       /* sqe -> rts: set purge bit of bad wqe before actual trans */
-       if ((my_qp->qp_type == IB_QPT_UD ||
-            my_qp->qp_type == IB_QPT_GSI ||
-            my_qp->qp_type == IB_QPT_SMI) &&
-           statetrans == IB_QPST_SQE2RTS) {
-               /* mark next free wqe if kernel */
-               if (!ibqp->uobject) {
-                       struct ehca_wqe *wqe;
-                       /* lock send queue */
-                       spin_lock_irqsave(&my_qp->spinlock_s, flags);
-                       squeue_locked = 1;
-                       /* mark next free wqe */
-                       wqe = (struct ehca_wqe *)
-                               ipz_qeit_get(&my_qp->ipz_squeue);
-                       wqe->optype = wqe->wqef = 0xff;
-                       ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
-                                ibqp->qp_num, wqe);
-               }
-               ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
-               if (ret) {
-                       ehca_err(ibqp->device, "prepare_sqe_rts() failed "
-                                "ehca_qp=%p qp_num=%x ret=%i",
-                                my_qp, ibqp->qp_num, ret);
-                       goto modify_qp_exit2;
-               }
-       }
-
-       /*
-        * enable RDMA_Atomic_Control if reset->init und reliable con
-        * this is necessary since gen2 does not provide that flag,
-        * but pHyp requires it
-        */
-       if (statetrans == IB_QPST_RESET2INIT &&
-           (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
-               mqpcb->rdma_atomic_ctrl = 3;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
-       }
-       /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
-       if (statetrans == IB_QPST_INIT2RTR &&
-           (ibqp->qp_type == IB_QPT_UC) &&
-           !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
-               mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX) {
-               if (attr->pkey_index >= 16) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid pkey_index=%x. "
-                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
-                                attr->pkey_index, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->prim_p_key_idx = attr->pkey_index;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
-       }
-       if (attr_mask & IB_QP_PORT) {
-               struct ehca_sport *sport;
-               struct ehca_qp *aqp1;
-               if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid port=%x. "
-                                "ehca_qp=%p qp_num=%x num_ports=%x",
-                                attr->port_num, my_qp, ibqp->qp_num,
-                                shca->num_ports);
-                       goto modify_qp_exit2;
-               }
-               sport = &shca->sport[attr->port_num - 1];
-               if (!sport->ibqp_sqp[IB_QPT_GSI]) {
-                       /* should not occur */
-                       ret = -EFAULT;
-                       ehca_err(ibqp->device, "AQP1 was not created for "
-                                "port=%x", attr->port_num);
-                       goto modify_qp_exit2;
-               }
-               aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
-                                   struct ehca_qp, ib_qp);
-               if (ibqp->qp_type != IB_QPT_GSI &&
-                   ibqp->qp_type != IB_QPT_SMI &&
-                   aqp1->mod_qp_parm) {
-                       /*
-                        * firmware will reject this modify_qp() because
-                        * port is not activated/initialized fully
-                        */
-                       ret = -EFAULT;
-                       ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
-                                 "either port is being activated (try again) "
-                                 "or cabling issue", attr->port_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->prim_phys_port = attr->port_num;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
-       }
-       if (attr_mask & IB_QP_QKEY) {
-               mqpcb->qkey = attr->qkey;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
-       }
-       if (attr_mask & IB_QP_AV) {
-               mqpcb->dlid = attr->ah_attr.dlid;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
-               mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
-               mqpcb->service_level = attr->ah_attr.sl;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
-
-               if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
-                                 attr->ah_attr.static_rate,
-                                 &mqpcb->max_static_rate)) {
-                       ret = -EINVAL;
-                       goto modify_qp_exit2;
-               }
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
-
-               /*
-                * Always supply the GRH flag, even if it's zero, to give the
-                * hypervisor a clear "yes" or "no" instead of a "perhaps"
-                */
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-
-               /*
-                * only if GRH is TRUE we might consider SOURCE_GID_IDX
-                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-                */
-               if (attr->ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag = 1;
-
-                       mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
-
-                       for (cnt = 0; cnt < 16; cnt++)
-                               mqpcb->dest_gid.byte[cnt] =
-                                       attr->ah_attr.grh.dgid.raw[cnt];
-
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
-                       mqpcb->flow_label = attr->ah_attr.grh.flow_label;
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
-                       mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
-                       update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
-                       mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
-               }
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU) {
-               /* store ld(MTU) */
-               my_qp->mtu_shift = attr->path_mtu + 7;
-               mqpcb->path_mtu = attr->path_mtu;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
-       }
-       if (attr_mask & IB_QP_TIMEOUT) {
-               mqpcb->timeout = attr->timeout;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
-       }
-       if (attr_mask & IB_QP_RETRY_CNT) {
-               mqpcb->retry_count = attr->retry_cnt;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
-       }
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               mqpcb->rnr_retry_count = attr->rnr_retry;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
-       }
-       if (attr_mask & IB_QP_RQ_PSN) {
-               mqpcb->receive_psn = attr->rq_psn;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
-       }
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-               mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
-                       attr->max_dest_rd_atomic : 2;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-       }
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-               mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
-                       attr->max_rd_atomic : 2;
-               update_mask |=
-                       EHCA_BMASK_SET
-                       (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
-       }
-       if (attr_mask & IB_QP_ALT_PATH) {
-               if (attr->alt_port_num < 1
-                   || attr->alt_port_num > shca->num_ports) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid alt_port=%x. "
-                                "ehca_qp=%p qp_num=%x num_ports=%x",
-                                attr->alt_port_num, my_qp, ibqp->qp_num,
-                                shca->num_ports);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->alt_phys_port = attr->alt_port_num;
-
-               if (attr->alt_pkey_index >= 16) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
-                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
-                                attr->pkey_index, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->alt_p_key_idx = attr->alt_pkey_index;
-
-               mqpcb->timeout_al = attr->alt_timeout;
-               mqpcb->dlid_al = attr->alt_ah_attr.dlid;
-               mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
-               mqpcb->service_level_al = attr->alt_ah_attr.sl;
-
-               if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
-                                 attr->alt_ah_attr.static_rate,
-                                 &mqpcb->max_static_rate_al)) {
-                       ret = -EINVAL;
-                       goto modify_qp_exit2;
-               }
-
-               /* OpenIB doesn't support alternate retry counts - copy them */
-               mqpcb->retry_count_al = mqpcb->retry_count;
-               mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
-
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
-
-               /*
-                * Always supply the GRH flag, even if it's zero, to give the
-                * hypervisor a clear "yes" or "no" instead of a "perhaps"
-                */
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
-
-               /*
-                * only if GRH is TRUE we might consider SOURCE_GID_IDX
-                * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-                */
-               if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag_al = 1;
-
-                       for (cnt = 0; cnt < 16; cnt++)
-                               mqpcb->dest_gid_al.byte[cnt] =
-                                       attr->alt_ah_attr.grh.dgid.raw[cnt];
-                       mqpcb->source_gid_idx_al =
-                               attr->alt_ah_attr.grh.sgid_index;
-                       mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
-                       mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
-                       mqpcb->traffic_class_al =
-                               attr->alt_ah_attr.grh.traffic_class;
-
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
-                               | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
-                               EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
-               }
-       }
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER) {
-               mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
-       }
-
-       if (attr_mask & IB_QP_SQ_PSN) {
-               mqpcb->send_psn = attr->sq_psn;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
-       }
-
-       if (attr_mask & IB_QP_DEST_QPN) {
-               mqpcb->dest_qp_nr = attr->dest_qp_num;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
-       }
-
-       if (attr_mask & IB_QP_PATH_MIG_STATE) {
-               if (attr->path_mig_state != IB_MIG_REARM
-                   && attr->path_mig_state != IB_MIG_MIGRATED) {
-                       ret = -EINVAL;
-                       ehca_err(ibqp->device, "Invalid mig_state=%x",
-                                attr->path_mig_state);
-                       goto modify_qp_exit2;
-               }
-               mqpcb->path_migration_state = attr->path_mig_state + 1;
-               if (attr->path_mig_state == IB_MIG_REARM)
-                       my_qp->mig_armed = 1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
-       }
-
-       if (attr_mask & IB_QP_CAP) {
-               mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
-               mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
-               /* no support for max_send/recv_sge yet */
-       }
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
-
-       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                                my_qp->ipz_qp_handle,
-                                &my_qp->pf,
-                                update_mask,
-                                mqpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
-               goto modify_qp_exit2;
-       }
-
-       if ((my_qp->qp_type == IB_QPT_UD ||
-            my_qp->qp_type == IB_QPT_GSI ||
-            my_qp->qp_type == IB_QPT_SMI) &&
-           statetrans == IB_QPST_SQE2RTS) {
-               /* doorbell to reprocessing wqes */
-               iosync(); /* serialize GAL register access */
-               hipz_update_sqa(my_qp, bad_wqe_cnt-1);
-               ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
-       }
-
-       if (statetrans == IB_QPST_RESET2INIT ||
-           statetrans == IB_QPST_INIT2INIT) {
-               mqpcb->qp_enable = 1;
-               mqpcb->qp_state = EHCA_QPS_INIT;
-               update_mask = 0;
-               update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-
-               h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-                                        my_qp->ipz_qp_handle,
-                                        &my_qp->pf,
-                                        update_mask,
-                                        mqpcb,
-                                        my_qp->galpas.kernel);
-
-               if (h_ret != H_SUCCESS) {
-                       ret = ehca2ib_return_code(h_ret);
-                       ehca_err(ibqp->device, "ENABLE in context of "
-                                "RESET_2_INIT failed! Maybe you didn't get "
-                                "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
-                                h_ret, my_qp, ibqp->qp_num);
-                       goto modify_qp_exit2;
-               }
-       }
-       if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
-           && !is_user) {
-               ret = check_for_left_cqes(my_qp, shca);
-               if (ret)
-                       goto modify_qp_exit2;
-       }
-
-       if (statetrans == IB_QPST_ANY2RESET) {
-               ipz_qeit_reset(&my_qp->ipz_rqueue);
-               ipz_qeit_reset(&my_qp->ipz_squeue);
-
-               if (qp_cur_state == IB_QPS_ERR && !is_user) {
-                       del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-                       if (HAS_RQ(my_qp))
-                               del_from_err_list(my_qp->recv_cq,
-                                                 &my_qp->rq_err_node);
-               }
-               if (!is_user)
-                       reset_queue_map(&my_qp->sq_map);
-
-               if (HAS_RQ(my_qp) && !is_user)
-                       reset_queue_map(&my_qp->rq_map);
-       }
-
-       if (attr_mask & IB_QP_QKEY)
-               my_qp->qkey = attr->qkey;
-
-modify_qp_exit2:
-       if (squeue_locked) { /* this means: sqe -> rts */
-               spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-               my_qp->sqerr_purgeflag = 1;
-       }
-
-modify_qp_exit1:
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return ret;
-}
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-                  struct ib_udata *udata)
-{
-       int ret = 0;
-
-       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-                                             ib_device);
-       struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-
-       /* The if-block below caches qp_attr to be modified for GSI and SMI
-        * qps during the initialization by ib_mad. When the respective port
-        * is activated, ie we got an event PORT_ACTIVE, we'll replay the
-        * cached modify calls sequence, see ehca_recover_sqs() below.
-        * Why that is required:
-        * 1) If one port is connected, older code requires that port one
-        *    to be connected and module option nr_ports=1 to be given by
-        *    user, which is very inconvenient for end user.
-        * 2) Firmware accepts modify_qp() only if respective port has become
-        *    active. Older code had a wait loop of 30sec create_qp()/
-        *    define_aqp1(), which is not appropriate in practice. This
-        *    code now removes that wait loop, see define_aqp1(), and always
-        *    reports all ports to ib_mad resp. users. Only activated ports
-        *    will then usable for the users.
-        */
-       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
-               int port = my_qp->init_attr.port_num;
-               struct ehca_sport *sport = &shca->sport[port - 1];
-               unsigned long flags;
-               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-               /* cache qp_attr only during init */
-               if (my_qp->mod_qp_parm) {
-                       struct ehca_mod_qp_parm *p;
-                       if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
-                               ehca_err(&shca->ib_device,
-                                        "mod_qp_parm overflow state=%x port=%x"
-                                        " type=%x", attr->qp_state,
-                                        my_qp->init_attr.port_num,
-                                        ibqp->qp_type);
-                               spin_unlock_irqrestore(&sport->mod_sqp_lock,
-                                                      flags);
-                               return -EINVAL;
-                       }
-                       p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
-                       p->mask = attr_mask;
-                       p->attr = *attr;
-                       my_qp->mod_qp_parm_idx++;
-                       ehca_dbg(&shca->ib_device,
-                                "Saved qp_attr for state=%x port=%x type=%x",
-                                attr->qp_state, my_qp->init_attr.port_num,
-                                ibqp->qp_type);
-                       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-                       goto out;
-               }
-               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-       }
-
-       ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
-
-out:
-       if ((ret == 0) && (attr_mask & IB_QP_STATE))
-               my_qp->state = attr->qp_state;
-
-       return ret;
-}
-
-void ehca_recover_sqp(struct ib_qp *sqp)
-{
-       struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
-       int port = my_sqp->init_attr.port_num;
-       struct ib_qp_attr attr;
-       struct ehca_mod_qp_parm *qp_parm;
-       int i, qp_parm_idx, ret;
-       unsigned long flags, wr_cnt;
-
-       if (!my_sqp->mod_qp_parm)
-               return;
-       ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
-
-       qp_parm = my_sqp->mod_qp_parm;
-       qp_parm_idx = my_sqp->mod_qp_parm_idx;
-       for (i = 0; i < qp_parm_idx; i++) {
-               attr = qp_parm[i].attr;
-               ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
-               if (ret) {
-                       ehca_err(sqp->device, "Could not modify SQP port=%x "
-                                "qp_num=%x ret=%x", port, sqp->qp_num, ret);
-                       goto free_qp_parm;
-               }
-               ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
-                        port, sqp->qp_num, attr.qp_state);
-       }
-
-       /* re-trigger posted recv wrs */
-       wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
-               my_sqp->ipz_rqueue.qe_size;
-       if (wr_cnt) {
-               spin_lock_irqsave(&my_sqp->spinlock_r, flags);
-               hipz_update_rqa(my_sqp, wr_cnt);
-               spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
-               ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
-                        port, sqp->qp_num, wr_cnt);
-       }
-
-free_qp_parm:
-       kfree(qp_parm);
-       /* this prevents subsequent calls to modify_qp() to cache qp_attr */
-       my_sqp->mod_qp_parm = NULL;
-}
-
-int ehca_query_qp(struct ib_qp *qp,
-                 struct ib_qp_attr *qp_attr,
-                 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       struct hcp_modify_qp_control_block *qpcb;
-       int cnt, ret = 0;
-       u64 h_ret;
-
-       if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
-               ehca_err(qp->device, "Invalid attribute mask "
-                        "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
-                        my_qp, qp->qp_num, qp_attr_mask);
-               return -EINVAL;
-       }
-
-       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!qpcb) {
-               ehca_err(qp->device, "Out of memory for qpcb "
-                        "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(adapter_handle,
-                               my_qp->ipz_qp_handle,
-                               &my_qp->pf,
-                               qpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(qp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, qp->qp_num, h_ret);
-               goto query_qp_exit1;
-       }
-
-       qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
-       qp_attr->qp_state = qp_attr->cur_qp_state;
-
-       if (qp_attr->cur_qp_state == -EINVAL) {
-               ret = -EINVAL;
-               ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
-                        "ehca_qp=%p qp_num=%x",
-                        qpcb->qp_state, my_qp, qp->qp_num);
-               goto query_qp_exit1;
-       }
-
-       if (qp_attr->qp_state == IB_QPS_SQD)
-               qp_attr->sq_draining = 1;
-
-       qp_attr->qkey = qpcb->qkey;
-       qp_attr->path_mtu = qpcb->path_mtu;
-       qp_attr->path_mig_state = qpcb->path_migration_state - 1;
-       qp_attr->rq_psn = qpcb->receive_psn;
-       qp_attr->sq_psn = qpcb->send_psn;
-       qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
-       qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
-       qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
-       /* UD_AV CIRCUMVENTION */
-       if (my_qp->qp_type == IB_QPT_UD) {
-               qp_attr->cap.max_send_sge =
-                       qpcb->actual_nr_sges_in_sq_wqe - 2;
-               qp_attr->cap.max_recv_sge =
-                       qpcb->actual_nr_sges_in_rq_wqe - 2;
-       } else {
-               qp_attr->cap.max_send_sge =
-                       qpcb->actual_nr_sges_in_sq_wqe;
-               qp_attr->cap.max_recv_sge =
-                       qpcb->actual_nr_sges_in_rq_wqe;
-       }
-
-       qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
-       qp_attr->dest_qp_num = qpcb->dest_qp_nr;
-
-       qp_attr->pkey_index = qpcb->prim_p_key_idx;
-       qp_attr->port_num = qpcb->prim_phys_port;
-       qp_attr->timeout = qpcb->timeout;
-       qp_attr->retry_cnt = qpcb->retry_count;
-       qp_attr->rnr_retry = qpcb->rnr_retry_count;
-
-       qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
-       qp_attr->alt_port_num = qpcb->alt_phys_port;
-       qp_attr->alt_timeout = qpcb->timeout_al;
-
-       qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
-       qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
-
-       /* primary av */
-       qp_attr->ah_attr.sl = qpcb->service_level;
-
-       if (qpcb->send_grh_flag) {
-               qp_attr->ah_attr.ah_flags = IB_AH_GRH;
-       }
-
-       qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
-       qp_attr->ah_attr.dlid = qpcb->dlid;
-       qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
-       qp_attr->ah_attr.port_num = qp_attr->port_num;
-
-       /* primary GRH */
-       qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
-       qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
-       qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
-       qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
-
-       for (cnt = 0; cnt < 16; cnt++)
-               qp_attr->ah_attr.grh.dgid.raw[cnt] =
-                       qpcb->dest_gid.byte[cnt];
-
-       /* alternate AV */
-       qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
-       if (qpcb->send_grh_flag_al) {
-               qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
-       }
-
-       qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
-       qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
-       qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
-
-       /* alternate GRH */
-       qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
-       qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
-       qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
-       qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
-
-       for (cnt = 0; cnt < 16; cnt++)
-               qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
-                       qpcb->dest_gid_al.byte[cnt];
-
-       /* return init attributes given in ehca_create_qp */
-       if (qp_init_attr)
-               *qp_init_attr = my_qp->init_attr;
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
-
-query_qp_exit1:
-       ehca_free_fw_ctrlblock(qpcb);
-
-       return ret;
-}
-
-int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
-{
-       struct ehca_qp *my_qp =
-               container_of(ibsrq, struct ehca_qp, ib_srq);
-       struct ehca_shca *shca =
-               container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
-       struct hcp_modify_qp_control_block *mqpcb;
-       u64 update_mask;
-       u64 h_ret;
-       int ret = 0;
-
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!mqpcb) {
-               ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
-                        "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-               return -ENOMEM;
-       }
-
-       update_mask = 0;
-       if (attr_mask & IB_SRQ_LIMIT) {
-               attr_mask &= ~IB_SRQ_LIMIT;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
-                       | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
-               mqpcb->curr_srq_limit = attr->srq_limit;
-               mqpcb->qp_aff_asyn_ev_log_reg =
-                       EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
-       }
-
-       /* by now, all bits in attr_mask should have been cleared */
-       if (attr_mask) {
-               ehca_err(ibsrq->device, "invalid attribute mask bits set  "
-                        "attr_mask=%x", attr_mask);
-               ret = -EINVAL;
-               goto modify_srq_exit0;
-       }
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-       h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
-                                NULL, update_mask, mqpcb,
-                                my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x",
-                        h_ret, my_qp, my_qp->real_qp_num);
-       }
-
-modify_srq_exit0:
-       ehca_free_fw_ctrlblock(mqpcb);
-
-       return ret;
-}
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
-{
-       struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
-       struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
-                                             ib_device);
-       struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-       struct hcp_modify_qp_control_block *qpcb;
-       int ret = 0;
-       u64 h_ret;
-
-       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-       if (!qpcb) {
-               ehca_err(srq->device, "Out of memory for qpcb "
-                        "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
-               return -ENOMEM;
-       }
-
-       h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
-                               NULL, qpcb, my_qp->galpas.kernel);
-
-       if (h_ret != H_SUCCESS) {
-               ret = ehca2ib_return_code(h_ret);
-               ehca_err(srq->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lli",
-                        my_qp, my_qp->real_qp_num, h_ret);
-               goto query_srq_exit1;
-       }
-
-       srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
-       srq_attr->max_sge = 3;
-       srq_attr->srq_limit = qpcb->curr_srq_limit;
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-query_srq_exit1:
-       ehca_free_fw_ctrlblock(qpcb);
-
-       return ret;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-                              struct ib_uobject *uobject)
-{
-       struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
-       struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-                                            ib_pd);
-       struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
-       u32 qp_num = my_qp->real_qp_num;
-       int ret;
-       u64 h_ret;
-       u8 port_num;
-       int is_user = 0;
-       enum ib_qp_type qp_type;
-       unsigned long flags;
-
-       if (uobject) {
-               is_user = 1;
-               if (my_qp->mm_count_galpa ||
-                   my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-                       ehca_err(dev, "Resources still referenced in "
-                                "user space qp_num=%x", qp_num);
-                       return -EINVAL;
-               }
-       }
-
-       if (my_qp->send_cq) {
-               ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
-               if (ret) {
-                       ehca_err(dev, "Couldn't unassign qp from "
-                                "send_cq ret=%i qp_num=%x cq_num=%x", ret,
-                                qp_num, my_qp->send_cq->cq_number);
-                       return ret;
-               }
-       }
-
-       write_lock_irqsave(&ehca_qp_idr_lock, flags);
-       idr_remove(&ehca_qp_idr, my_qp->token);
-       write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-       /*
-        * SRQs will never get into an error list and do not have a recv_cq,
-        * so we need to skip them here.
-        */
-       if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
-               del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
-
-       if (HAS_SQ(my_qp) && !is_user)
-               del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-       /* now wait until all pending events have completed */
-       wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
-
-       h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-       if (h_ret != H_SUCCESS) {
-               ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
-                        "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
-               return ehca2ib_return_code(h_ret);
-       }
-
-       port_num = my_qp->init_attr.port_num;
-       qp_type  = my_qp->init_attr.qp_type;
-
-       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-               kfree(my_qp->mod_qp_parm);
-               my_qp->mod_qp_parm = NULL;
-               shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
-               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-       }
-
-       /* no support for IB_QPT_SMI yet */
-       if (qp_type == IB_QPT_GSI) {
-               struct ib_event event;
-               ehca_info(dev, "device %s: port %x is inactive.",
-                               shca->ib_device.name, port_num);
-               event.device = &shca->ib_device;
-               event.event = IB_EVENT_PORT_ERR;
-               event.element.port_num = port_num;
-               shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
-               ib_dispatch_event(&event);
-       }
-
-       if (HAS_RQ(my_qp)) {
-               ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-               if (!is_user)
-                       vfree(my_qp->rq_map.map);
-       }
-       if (HAS_SQ(my_qp)) {
-               ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-               if (!is_user)
-                       vfree(my_qp->sq_map.map);
-       }
-       kmem_cache_free(qp_cache, my_qp);
-       atomic_dec(&shca->num_qps);
-       return 0;
-}
-
-int ehca_destroy_qp(struct ib_qp *qp)
-{
-       return internal_destroy_qp(qp->device,
-                                  container_of(qp, struct ehca_qp, ib_qp),
-                                  qp->uobject);
-}
-
-int ehca_destroy_srq(struct ib_srq *srq)
-{
-       return internal_destroy_qp(srq->device,
-                                  container_of(srq, struct ehca_qp, ib_srq),
-                                  srq->uobject);
-}
-
-int ehca_init_qp_cache(void)
-{
-       qp_cache = kmem_cache_create("ehca_cache_qp",
-                                    sizeof(struct ehca_qp), 0,
-                                    SLAB_HWCACHE_ALIGN,
-                                    NULL);
-       if (!qp_cache)
-               return -ENOMEM;
-       return 0;
-}
-
-void ehca_cleanup_qp_cache(void)
-{
-       kmem_cache_destroy(qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
deleted file mode 100644 (file)
index 10e2074..0000000
+++ /dev/null
@@ -1,954 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  post_send/recv, poll_cq, req_notify
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-/* in RC traffic, insert an empty RDMA READ every this many packets */
-#define ACK_CIRC_THRESHOLD 2000000
-
-static u64 replace_wr_id(u64 wr_id, u16 idx)
-{
-       u64 ret;
-
-       ret = wr_id & ~QMAP_IDX_MASK;
-       ret |= idx & QMAP_IDX_MASK;
-
-       return ret;
-}
-
-static u16 get_app_wr_id(u64 wr_id)
-{
-       return wr_id & QMAP_IDX_MASK;
-}
-
-static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
-                                 struct ehca_wqe *wqe_p,
-                                 struct ib_recv_wr *recv_wr,
-                                 u32 rq_map_idx)
-{
-       u8 cnt_ds;
-       if (unlikely((recv_wr->num_sge < 0) ||
-                    (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
-               ehca_gen_err("Invalid number of WQE SGE. "
-                        "num_sqe=%x max_nr_of_sg=%x",
-                        recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
-               return -EINVAL; /* invalid SG list length */
-       }
-
-       /* clear wqe header until sglist */
-       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-       wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
-       wqe_p->nr_of_data_seg = recv_wr->num_sge;
-
-       for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
-               wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
-                       recv_wr->sg_list[cnt_ds].addr;
-               wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
-                       recv_wr->sg_list[cnt_ds].lkey;
-               wqe_p->u.all_rcv.sg_list[cnt_ds].length =
-                       recv_wr->sg_list[cnt_ds].length;
-       }
-
-       if (ehca_debug_level >= 3) {
-               ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
-                            ipz_rqueue);
-               ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
-       }
-
-       return 0;
-}
-
-#if defined(DEBUG_GSI_SEND_WR)
-
-/* need ib_mad struct */
-#include <rdma/ib_mad.h>
-
-static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
-{
-       int idx;
-       int j;
-       while (ud_wr) {
-               struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
-               struct ib_sge *sge = ud_wr->wr.sg_list;
-               ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
-                            "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
-                            ud_wr->wr.num_sge, ud_wr->wr.send_flags,
-                            ud_wr->.wr.opcode);
-               if (mad_hdr) {
-                       ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
-                                    "mgmt_class=%x class_version=%x method=%x "
-                                    "status=%x class_specific=%x tid=%lx "
-                                    "attr_id=%x resv=%x attr_mod=%x",
-                                    idx, mad_hdr->base_version,
-                                    mad_hdr->mgmt_class,
-                                    mad_hdr->class_version, mad_hdr->method,
-                                    mad_hdr->status, mad_hdr->class_specific,
-                                    mad_hdr->tid, mad_hdr->attr_id,
-                                    mad_hdr->resv,
-                                    mad_hdr->attr_mod);
-               }
-               for (j = 0; j < ud_wr->wr.num_sge; j++) {
-                       u8 *data = __va(sge->addr);
-                       ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
-                                    "lkey=%x",
-                                    idx, j, data, sge->length, sge->lkey);
-                       /* assume length is n*16 */
-                       ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
-                                idx, j);
-                       sge++;
-               } /* eof for j */
-               idx++;
-               ud_wr = ud_wr(ud_wr->wr.next);
-       } /* eof while ud_wr */
-}
-
-#endif /* DEBUG_GSI_SEND_WR */
-
-static inline int ehca_write_swqe(struct ehca_qp *qp,
-                                 struct ehca_wqe *wqe_p,
-                                 struct ib_send_wr *send_wr,
-                                 u32 sq_map_idx,
-                                 int hidden)
-{
-       u32 idx;
-       u64 dma_length;
-       struct ehca_av *my_av;
-       u32 remote_qkey;
-       struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
-
-       if (unlikely((send_wr->num_sge < 0) ||
-                    (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
-               ehca_gen_err("Invalid number of WQE SGE. "
-                        "num_sqe=%x max_nr_of_sg=%x",
-                        send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
-               return -EINVAL; /* invalid SG list length */
-       }
-
-       /* clear wqe header until sglist */
-       memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-       wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
-
-       qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
-       qmap_entry->reported = 0;
-       qmap_entry->cqe_req = 0;
-
-       switch (send_wr->opcode) {
-       case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
-               wqe_p->optype = WQE_OPTYPE_SEND;
-               break;
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
-               break;
-       case IB_WR_RDMA_READ:
-               wqe_p->optype = WQE_OPTYPE_RDMAREAD;
-               break;
-       default:
-               ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
-               return -EINVAL; /* invalid opcode */
-       }
-
-       wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
-
-       wqe_p->wr_flag = 0;
-
-       if ((send_wr->send_flags & IB_SEND_SIGNALED ||
-           qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
-           && !hidden) {
-               wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
-               qmap_entry->cqe_req = 1;
-       }
-
-       if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
-           send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
-               /* this might not work as long as HW does not support it */
-               wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
-               wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
-       }
-
-       wqe_p->nr_of_data_seg = send_wr->num_sge;
-
-       switch (qp->qp_type) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               /* no break is intential here */
-       case IB_QPT_UD:
-               /* IB 1.2 spec C10-15 compliance */
-               remote_qkey = ud_wr(send_wr)->remote_qkey;
-               if (remote_qkey & 0x80000000)
-                       remote_qkey = qp->qkey;
-
-               wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
-               wqe_p->local_ee_context_qkey = remote_qkey;
-               if (unlikely(!ud_wr(send_wr)->ah)) {
-                       ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
-                       return -EINVAL;
-               }
-               if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
-                       ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
-                       return -EINVAL;
-               }
-               my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
-               wqe_p->u.ud_av.ud_av = my_av->av;
-
-               /*
-                * omitted check of IB_SEND_INLINE
-                * since HW does not support it
-                */
-               for (idx = 0; idx < send_wr->num_sge; idx++) {
-                       wqe_p->u.ud_av.sg_list[idx].vaddr =
-                               send_wr->sg_list[idx].addr;
-                       wqe_p->u.ud_av.sg_list[idx].lkey =
-                               send_wr->sg_list[idx].lkey;
-                       wqe_p->u.ud_av.sg_list[idx].length =
-                               send_wr->sg_list[idx].length;
-               } /* eof for idx */
-               if (qp->qp_type == IB_QPT_SMI ||
-                   qp->qp_type == IB_QPT_GSI)
-                       wqe_p->u.ud_av.ud_av.pmtu = 1;
-               if (qp->qp_type == IB_QPT_GSI) {
-                       wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
-#ifdef DEBUG_GSI_SEND_WR
-                       trace_ud_wr(ud_wr(send_wr));
-#endif /* DEBUG_GSI_SEND_WR */
-               }
-               break;
-
-       case IB_QPT_UC:
-               if (send_wr->send_flags & IB_SEND_FENCE)
-                       wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
-               /* no break is intentional here */
-       case IB_QPT_RC:
-               /* TODO: atomic not implemented */
-               wqe_p->u.nud.remote_virtual_address =
-                       rdma_wr(send_wr)->remote_addr;
-               wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
-
-               /*
-                * omitted checking of IB_SEND_INLINE
-                * since HW does not support it
-                */
-               dma_length = 0;
-               for (idx = 0; idx < send_wr->num_sge; idx++) {
-                       wqe_p->u.nud.sg_list[idx].vaddr =
-                               send_wr->sg_list[idx].addr;
-                       wqe_p->u.nud.sg_list[idx].lkey =
-                               send_wr->sg_list[idx].lkey;
-                       wqe_p->u.nud.sg_list[idx].length =
-                               send_wr->sg_list[idx].length;
-                       dma_length += send_wr->sg_list[idx].length;
-               } /* eof idx */
-               wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
-
-               /* unsolicited ack circumvention */
-               if (send_wr->opcode == IB_WR_RDMA_READ) {
-                       /* on RDMA read, switch on and reset counters */
-                       qp->message_count = qp->packet_count = 0;
-                       qp->unsol_ack_circ = 1;
-               } else
-                       /* else estimate #packets */
-                       qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
-
-               break;
-
-       default:
-               ehca_gen_err("Invalid qptype=%x", qp->qp_type);
-               return -EINVAL;
-       }
-
-       if (ehca_debug_level >= 3) {
-               ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
-               ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
-       }
-       return 0;
-}
-
-/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
-static inline void map_ib_wc_status(u32 cqe_status,
-                                   enum ib_wc_status *wc_status)
-{
-       if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
-               switch (cqe_status & 0x3F) {
-               case 0x01:
-               case 0x21:
-                       *wc_status = IB_WC_LOC_LEN_ERR;
-                       break;
-               case 0x02:
-               case 0x22:
-                       *wc_status = IB_WC_LOC_QP_OP_ERR;
-                       break;
-               case 0x03:
-               case 0x23:
-                       *wc_status = IB_WC_LOC_EEC_OP_ERR;
-                       break;
-               case 0x04:
-               case 0x24:
-                       *wc_status = IB_WC_LOC_PROT_ERR;
-                       break;
-               case 0x05:
-               case 0x25:
-                       *wc_status = IB_WC_WR_FLUSH_ERR;
-                       break;
-               case 0x06:
-                       *wc_status = IB_WC_MW_BIND_ERR;
-                       break;
-               case 0x07: /* remote error - look into bits 20:24 */
-                       switch ((cqe_status
-                                & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
-                       case 0x0:
-                               /*
-                                * PSN Sequence Error!
-                                * couldn't find a matching status!
-                                */
-                               *wc_status = IB_WC_GENERAL_ERR;
-                               break;
-                       case 0x1:
-                               *wc_status = IB_WC_REM_INV_REQ_ERR;
-                               break;
-                       case 0x2:
-                               *wc_status = IB_WC_REM_ACCESS_ERR;
-                               break;
-                       case 0x3:
-                               *wc_status = IB_WC_REM_OP_ERR;
-                               break;
-                       case 0x4:
-                               *wc_status = IB_WC_REM_INV_RD_REQ_ERR;
-                               break;
-                       }
-                       break;
-               case 0x08:
-                       *wc_status = IB_WC_RETRY_EXC_ERR;
-                       break;
-               case 0x09:
-                       *wc_status = IB_WC_RNR_RETRY_EXC_ERR;
-                       break;
-               case 0x0A:
-               case 0x2D:
-                       *wc_status = IB_WC_REM_ABORT_ERR;
-                       break;
-               case 0x0B:
-               case 0x2E:
-                       *wc_status = IB_WC_INV_EECN_ERR;
-                       break;
-               case 0x0C:
-               case 0x2F:
-                       *wc_status = IB_WC_INV_EEC_STATE_ERR;
-                       break;
-               case 0x0D:
-                       *wc_status = IB_WC_BAD_RESP_ERR;
-                       break;
-               case 0x10:
-                       /* WQE purged */
-                       *wc_status = IB_WC_WR_FLUSH_ERR;
-                       break;
-               default:
-                       *wc_status = IB_WC_FATAL_ERR;
-
-               }
-       } else
-               *wc_status = IB_WC_SUCCESS;
-}
-
-static inline int post_one_send(struct ehca_qp *my_qp,
-                        struct ib_send_wr *cur_send_wr,
-                        int hidden)
-{
-       struct ehca_wqe *wqe_p;
-       int ret;
-       u32 sq_map_idx;
-       u64 start_offset = my_qp->ipz_squeue.current_q_offset;
-
-       /* get pointer next to free WQE */
-       wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
-       if (unlikely(!wqe_p)) {
-               /* too many posted work requests: queue overflow */
-               ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
-                        "qp_num=%x", my_qp->ib_qp.qp_num);
-               return -ENOMEM;
-       }
-
-       /*
-        * Get the index of the WQE in the send queue. The same index is used
-        * for writing into the sq_map.
-        */
-       sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
-
-       /* write a SEND WQE into the QUEUE */
-       ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
-       /*
-        * if something failed,
-        * reset the free entry pointer to the start value
-        */
-       if (unlikely(ret)) {
-               my_qp->ipz_squeue.current_q_offset = start_offset;
-               ehca_err(my_qp->ib_qp.device, "Could not write WQE "
-                        "qp_num=%x", my_qp->ib_qp.qp_num);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int ehca_post_send(struct ib_qp *qp,
-                  struct ib_send_wr *send_wr,
-                  struct ib_send_wr **bad_send_wr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       int wqe_cnt = 0;
-       int ret = 0;
-       unsigned long flags;
-
-       /* Reject WR if QP is in RESET, INIT or RTR state */
-       if (unlikely(my_qp->state < IB_QPS_RTS)) {
-               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-                        my_qp->state, qp->qp_num);
-               ret = -EINVAL;
-               goto out;
-       }
-
-       /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_s, flags);
-
-       /* Send an empty extra RDMA read if:
-        *  1) there has been an RDMA read on this connection before
-        *  2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
-        *  3) we can be sure that any previous extra RDMA read has been
-        *     processed so we don't overflow the SQ
-        */
-       if (unlikely(my_qp->unsol_ack_circ &&
-                    my_qp->packet_count > ACK_CIRC_THRESHOLD &&
-                    my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
-               /* insert an empty RDMA READ to fix up the remote QP state */
-               struct ib_send_wr circ_wr;
-               memset(&circ_wr, 0, sizeof(circ_wr));
-               circ_wr.opcode = IB_WR_RDMA_READ;
-               post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
-               wqe_cnt++;
-               ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
-               my_qp->message_count = my_qp->packet_count = 0;
-       }
-
-       /* loop processes list of send reqs */
-       while (send_wr) {
-               ret = post_one_send(my_qp, send_wr, 0);
-               if (unlikely(ret)) {
-                       goto post_send_exit0;
-               }
-               wqe_cnt++;
-               send_wr = send_wr->next;
-       }
-
-post_send_exit0:
-       iosync(); /* serialize GAL register access */
-       hipz_update_sqa(my_qp, wqe_cnt);
-       if (unlikely(ret || ehca_debug_level >= 2))
-               ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-                        my_qp, qp->qp_num, wqe_cnt, ret);
-       my_qp->message_count += wqe_cnt;
-       spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-
-out:
-       if (ret)
-               *bad_send_wr = send_wr;
-       return ret;
-}
-
-static int internal_post_recv(struct ehca_qp *my_qp,
-                             struct ib_device *dev,
-                             struct ib_recv_wr *recv_wr,
-                             struct ib_recv_wr **bad_recv_wr)
-{
-       struct ehca_wqe *wqe_p;
-       int wqe_cnt = 0;
-       int ret = 0;
-       u32 rq_map_idx;
-       unsigned long flags;
-       struct ehca_qmap_entry *qmap_entry;
-
-       if (unlikely(!HAS_RQ(my_qp))) {
-               ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
-                        my_qp, my_qp->real_qp_num, my_qp->ext_type);
-               ret = -ENODEV;
-               goto out;
-       }
-
-       /* LOCK the QUEUE */
-       spin_lock_irqsave(&my_qp->spinlock_r, flags);
-
-       /* loop processes list of recv reqs */
-       while (recv_wr) {
-               u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
-               /* get pointer next to free WQE */
-               wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
-               if (unlikely(!wqe_p)) {
-                       /* too many posted work requests: queue overflow */
-                       ret = -ENOMEM;
-                       ehca_err(dev, "Too many posted WQEs "
-                               "qp_num=%x", my_qp->real_qp_num);
-                       goto post_recv_exit0;
-               }
-               /*
-                * Get the index of the WQE in the recv queue. The same index
-                * is used for writing into the rq_map.
-                */
-               rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
-
-               /* write a RECV WQE into the QUEUE */
-               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
-                               rq_map_idx);
-               /*
-                * if something failed,
-                * reset the free entry pointer to the start value
-                */
-               if (unlikely(ret)) {
-                       my_qp->ipz_rqueue.current_q_offset = start_offset;
-                       ret = -EINVAL;
-                       ehca_err(dev, "Could not write WQE "
-                               "qp_num=%x", my_qp->real_qp_num);
-                       goto post_recv_exit0;
-               }
-
-               qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-               qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
-               qmap_entry->reported = 0;
-               qmap_entry->cqe_req = 1;
-
-               wqe_cnt++;
-               recv_wr = recv_wr->next;
-       } /* eof for recv_wr */
-
-post_recv_exit0:
-       iosync(); /* serialize GAL register access */
-       hipz_update_rqa(my_qp, wqe_cnt);
-       if (unlikely(ret || ehca_debug_level >= 2))
-           ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-                    my_qp, my_qp->real_qp_num, wqe_cnt, ret);
-       spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
-
-out:
-       if (ret)
-               *bad_recv_wr = recv_wr;
-
-       return ret;
-}
-
-int ehca_post_recv(struct ib_qp *qp,
-                  struct ib_recv_wr *recv_wr,
-                  struct ib_recv_wr **bad_recv_wr)
-{
-       struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-
-       /* Reject WR if QP is in RESET state */
-       if (unlikely(my_qp->state == IB_QPS_RESET)) {
-               ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-                        my_qp->state, qp->qp_num);
-               *bad_recv_wr = recv_wr;
-               return -EINVAL;
-       }
-
-       return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
-}
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-                      struct ib_recv_wr *recv_wr,
-                      struct ib_recv_wr **bad_recv_wr)
-{
-       return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
-                                 srq->device, recv_wr, bad_recv_wr);
-}
-
-/*
- * ib_wc_opcode table converts ehca wc opcode to ib
- * Since we use zero to indicate invalid opcode, the actual ib opcode must
- * be decremented!!!
- */
-static const u8 ib_wc_opcode[255] = {
-       [0x01] = IB_WC_RECV+1,
-       [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
-       [0x04] = IB_WC_BIND_MW+1,
-       [0x08] = IB_WC_FETCH_ADD+1,
-       [0x10] = IB_WC_COMP_SWAP+1,
-       [0x20] = IB_WC_RDMA_WRITE+1,
-       [0x40] = IB_WC_RDMA_READ+1,
-       [0x80] = IB_WC_SEND+1
-};
-
-/* internal function to poll one entry of cq */
-static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
-{
-       int ret = 0, qmap_tail_idx;
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       struct ehca_cqe *cqe;
-       struct ehca_qp *my_qp;
-       struct ehca_qmap_entry *qmap_entry;
-       struct ehca_queue_map *qmap;
-       int cqe_count = 0, is_error;
-
-repoll:
-       cqe = (struct ehca_cqe *)
-               ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
-       if (!cqe) {
-               ret = -EAGAIN;
-               if (ehca_debug_level >= 3)
-                       ehca_dbg(cq->device, "Completion queue is empty  "
-                                "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
-               goto poll_cq_one_exit0;
-       }
-
-       /* prevents loads being reordered across this point */
-       rmb();
-
-       cqe_count++;
-       if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
-               struct ehca_qp *qp;
-               int purgeflag;
-               unsigned long flags;
-
-               qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
-               if (!qp) {
-                       ehca_err(cq->device, "cq_num=%x qp_num=%x "
-                                "could not find qp -> ignore cqe",
-                                my_cq->cq_number, cqe->local_qp_number);
-                       ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
-                                my_cq->cq_number, cqe->local_qp_number);
-                       /* ignore this purged cqe */
-                       goto repoll;
-               }
-               spin_lock_irqsave(&qp->spinlock_s, flags);
-               purgeflag = qp->sqerr_purgeflag;
-               spin_unlock_irqrestore(&qp->spinlock_s, flags);
-
-               if (purgeflag) {
-                       ehca_dbg(cq->device,
-                                "Got CQE with purged bit qp_num=%x src_qp=%x",
-                                cqe->local_qp_number, cqe->remote_qp_number);
-                       if (ehca_debug_level >= 2)
-                               ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
-                                        cqe->local_qp_number,
-                                        cqe->remote_qp_number);
-                       /*
-                        * ignore this to avoid double cqes of bad wqe
-                        * that caused sqe and turn off purge flag
-                        */
-                       qp->sqerr_purgeflag = 0;
-                       goto repoll;
-               }
-       }
-
-       is_error = cqe->status & WC_STATUS_ERROR_BIT;
-
-       /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
-       if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
-               ehca_dbg(cq->device,
-                        "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
-                        is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
-               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-                        my_cq, my_cq->cq_number);
-               ehca_dbg(cq->device,
-                        "ehca_cq=%p cq_num=%x -------------------------",
-                        my_cq, my_cq->cq_number);
-       }
-
-       read_lock(&ehca_qp_idr_lock);
-       my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
-       read_unlock(&ehca_qp_idr_lock);
-       if (!my_qp)
-               goto repoll;
-       wc->qp = &my_qp->ib_qp;
-
-       qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
-       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
-               /* We got a send completion. */
-               qmap = &my_qp->sq_map;
-       else
-               /* We got a receive completion. */
-               qmap = &my_qp->rq_map;
-
-       /* advance the tail pointer */
-       qmap->tail = qmap_tail_idx;
-
-       if (is_error) {
-               /*
-                * set left_to_poll to 0 because in error state, we will not
-                * get any additional CQEs
-                */
-               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-                                                       my_qp->sq_map.entries);
-               my_qp->sq_map.left_to_poll = 0;
-               ehca_add_to_err_list(my_qp, 1);
-
-               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-                                                       my_qp->rq_map.entries);
-               my_qp->rq_map.left_to_poll = 0;
-               if (HAS_RQ(my_qp))
-                       ehca_add_to_err_list(my_qp, 0);
-       }
-
-       qmap_entry = &qmap->map[qmap_tail_idx];
-       if (qmap_entry->reported) {
-               ehca_warn(cq->device, "Double cqe on qp_num=%#x",
-                               my_qp->real_qp_num);
-               /* found a double cqe, discard it and read next one */
-               goto repoll;
-       }
-
-       wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
-       qmap_entry->reported = 1;
-
-       /* if left_to_poll is decremented to 0, add the QP to the error list */
-       if (qmap->left_to_poll > 0) {
-               qmap->left_to_poll--;
-               if ((my_qp->sq_map.left_to_poll == 0) &&
-                               (my_qp->rq_map.left_to_poll == 0)) {
-                       ehca_add_to_err_list(my_qp, 1);
-                       if (HAS_RQ(my_qp))
-                               ehca_add_to_err_list(my_qp, 0);
-               }
-       }
-
-       /* eval ib_wc_opcode */
-       wc->opcode = ib_wc_opcode[cqe->optype]-1;
-       if (unlikely(wc->opcode == -1)) {
-               ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
-                        "ehca_cq=%p cq_num=%x",
-                        cqe->optype, cqe->status, my_cq, my_cq->cq_number);
-               /* dump cqe for other infos */
-               ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-                        my_cq, my_cq->cq_number);
-               /* update also queue adder to throw away this entry!!! */
-               goto repoll;
-       }
-
-       /* eval ib_wc_status */
-       if (unlikely(is_error)) {
-               /* complete with errors */
-               map_ib_wc_status(cqe->status, &wc->status);
-               wc->vendor_err = wc->status;
-       } else
-               wc->status = IB_WC_SUCCESS;
-
-       wc->byte_len = cqe->nr_bytes_transferred;
-       wc->pkey_index = cqe->pkey_index;
-       wc->slid = cqe->rlid;
-       wc->dlid_path_bits = cqe->dlid;
-       wc->src_qp = cqe->remote_qp_number;
-       /*
-        * HW has "Immed data present" and "GRH present" in bits 6 and 5.
-        * SW defines those in bits 1 and 0, so we can just shift and mask.
-        */
-       wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
-       wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
-       wc->sl = cqe->service_level;
-
-poll_cq_one_exit0:
-       if (cqe_count > 0)
-               hipz_update_feca(my_cq, cqe_count);
-
-       return ret;
-}
-
-static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
-                              struct ib_wc *wc, int num_entries,
-                              struct ipz_queue *ipz_queue, int on_sq)
-{
-       int nr = 0;
-       struct ehca_wqe *wqe;
-       u64 offset;
-       struct ehca_queue_map *qmap;
-       struct ehca_qmap_entry *qmap_entry;
-
-       if (on_sq)
-               qmap = &my_qp->sq_map;
-       else
-               qmap = &my_qp->rq_map;
-
-       qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-       while ((nr < num_entries) && (qmap_entry->reported == 0)) {
-               /* generate flush CQE */
-
-               memset(wc, 0, sizeof(*wc));
-
-               offset = qmap->next_wqe_idx * ipz_queue->qe_size;
-               wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
-               if (!wqe) {
-                       ehca_err(cq->device, "Invalid wqe offset=%#llx on "
-                                "qp_num=%#x", offset, my_qp->real_qp_num);
-                       return nr;
-               }
-
-               wc->wr_id = replace_wr_id(wqe->work_request_id,
-                                         qmap_entry->app_wr_id);
-
-               if (on_sq) {
-                       switch (wqe->optype) {
-                       case WQE_OPTYPE_SEND:
-                               wc->opcode = IB_WC_SEND;
-                               break;
-                       case WQE_OPTYPE_RDMAWRITE:
-                               wc->opcode = IB_WC_RDMA_WRITE;
-                               break;
-                       case WQE_OPTYPE_RDMAREAD:
-                               wc->opcode = IB_WC_RDMA_READ;
-                               break;
-                       default:
-                               ehca_err(cq->device, "Invalid optype=%x",
-                                               wqe->optype);
-                               return nr;
-                       }
-               } else
-                       wc->opcode = IB_WC_RECV;
-
-               if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
-                       wc->ex.imm_data = wqe->immediate_data;
-                       wc->wc_flags |= IB_WC_WITH_IMM;
-               }
-
-               wc->status = IB_WC_WR_FLUSH_ERR;
-
-               wc->qp = &my_qp->ib_qp;
-
-               /* mark as reported and advance next_wqe pointer */
-               qmap_entry->reported = 1;
-               qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
-                                               qmap->entries);
-               qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-               wc++; nr++;
-       }
-
-       return nr;
-
-}
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
-{
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int nr;
-       struct ehca_qp *err_qp;
-       struct ib_wc *current_wc = wc;
-       int ret = 0;
-       unsigned long flags;
-       int entries_left = num_entries;
-
-       if (num_entries < 1) {
-               ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
-                        "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
-               ret = -EINVAL;
-               goto poll_cq_exit0;
-       }
-
-       spin_lock_irqsave(&my_cq->spinlock, flags);
-
-       /* generate flush cqes for send queues */
-       list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
-               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-                               &err_qp->ipz_squeue, 1);
-               entries_left -= nr;
-               current_wc += nr;
-
-               if (entries_left == 0)
-                       break;
-       }
-
-       /* generate flush cqes for receive queues */
-       list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
-               nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-                               &err_qp->ipz_rqueue, 0);
-               entries_left -= nr;
-               current_wc += nr;
-
-               if (entries_left == 0)
-                       break;
-       }
-
-       for (nr = 0; nr < entries_left; nr++) {
-               ret = ehca_poll_cq_one(cq, current_wc);
-               if (ret)
-                       break;
-               current_wc++;
-       } /* eof for nr */
-       entries_left -= nr;
-
-       spin_unlock_irqrestore(&my_cq->spinlock, flags);
-       if (ret == -EAGAIN  || !ret)
-               ret = num_entries - entries_left;
-
-poll_cq_exit0:
-       return ret;
-}
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
-{
-       struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-       int ret = 0;
-
-       switch (notify_flags & IB_CQ_SOLICITED_MASK) {
-       case IB_CQ_SOLICITED:
-               hipz_set_cqx_n0(my_cq, 1);
-               break;
-       case IB_CQ_NEXT_COMP:
-               hipz_set_cqx_n1(my_cq, 1);
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-               unsigned long spl_flags;
-               spin_lock_irqsave(&my_cq->spinlock, spl_flags);
-               ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
-               spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
-       }
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c
deleted file mode 100644 (file)
index 376b031..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  SQP functions
- *
- *  Authors: Khadija Souissi <souissi@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <rdma/ib_mad.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define IB_MAD_STATUS_REDIRECT         cpu_to_be16(0x0002)
-#define IB_MAD_STATUS_UNSUP_VERSION    cpu_to_be16(0x0004)
-#define IB_MAD_STATUS_UNSUP_METHOD     cpu_to_be16(0x0008)
-
-#define IB_PMA_CLASS_PORT_INFO         cpu_to_be16(0x0001)
-
-/**
- * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
- * pair is created successfully, the corresponding port gets active.
- *
- * Define Special Queue pair 0 (SMI QP) is still not supported.
- *
- * @qp_init_attr: Queue pair init attributes with port and queue pair type
- */
-
-u64 ehca_define_sqp(struct ehca_shca *shca,
-                   struct ehca_qp *ehca_qp,
-                   struct ib_qp_init_attr *qp_init_attr)
-{
-       u32 pma_qp_nr, bma_qp_nr;
-       u64 ret;
-       u8 port = qp_init_attr->port_num;
-       int counter;
-
-       shca->sport[port - 1].port_state = IB_PORT_DOWN;
-
-       switch (qp_init_attr->qp_type) {
-       case IB_QPT_SMI:
-               /* function not supported yet */
-               break;
-       case IB_QPT_GSI:
-               ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
-                                        ehca_qp->ipz_qp_handle,
-                                        ehca_qp->galpas.kernel,
-                                        (u32) qp_init_attr->port_num,
-                                        &pma_qp_nr, &bma_qp_nr);
-
-               if (ret != H_SUCCESS) {
-                       ehca_err(&shca->ib_device,
-                                "Can't define AQP1 for port %x. h_ret=%lli",
-                                port, ret);
-                       return ret;
-               }
-               shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
-               ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
-                        port, pma_qp_nr);
-               break;
-       default:
-               ehca_err(&shca->ib_device, "invalid qp_type=%x",
-                        qp_init_attr->qp_type);
-               return H_PARAMETER;
-       }
-
-       if (ehca_nr_ports < 0) /* autodetect mode */
-               return H_SUCCESS;
-
-       for (counter = 0;
-            shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
-                    counter < ehca_port_act_time;
-            counter++) {
-               ehca_dbg(&shca->ib_device, "... wait until port %x is active",
-                        port);
-               msleep_interruptible(1000);
-       }
-
-       if (counter == ehca_port_act_time) {
-               ehca_err(&shca->ib_device, "Port %x is not active.", port);
-               return H_HARDWARE;
-       }
-
-       return H_SUCCESS;
-}
-
-struct ib_perf {
-       struct ib_mad_hdr mad_hdr;
-       u8 reserved[40];
-       u8 data[192];
-} __attribute__ ((packed));
-
-/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
-struct tcslfl {
-       u32 tc:8;
-       u32 sl:4;
-       u32 fl:20;
-} __attribute__ ((packed));
-
-/* IP Version/TC/FL packed into 32 bits, as in GRH */
-struct vertcfl {
-       u32 ver:4;
-       u32 tc:8;
-       u32 fl:20;
-} __attribute__ ((packed));
-
-static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
-                            const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                            const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
-       const struct ib_perf *in_perf = (const struct ib_perf *)in_mad;
-       struct ib_perf *out_perf = (struct ib_perf *)out_mad;
-       struct ib_class_port_info *poi =
-               (struct ib_class_port_info *)out_perf->data;
-       struct tcslfl *tcslfl =
-               (struct tcslfl *)&poi->redirect_tcslfl;
-       struct ehca_shca *shca =
-               container_of(ibdev, struct ehca_shca, ib_device);
-       struct ehca_sport *sport = &shca->sport[port_num - 1];
-
-       ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
-
-       *out_mad = *in_mad;
-
-       if (in_perf->mad_hdr.class_version != 1) {
-               ehca_warn(ibdev, "Unsupported class_version=%x",
-                         in_perf->mad_hdr.class_version);
-               out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
-               goto perf_reply;
-       }
-
-       switch (in_perf->mad_hdr.method) {
-       case IB_MGMT_METHOD_GET:
-       case IB_MGMT_METHOD_SET:
-               /* set class port info for redirection */
-               out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
-               out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
-               memset(poi, 0, sizeof(*poi));
-               poi->base_version = 1;
-               poi->class_version = 1;
-               poi->resp_time_value = 18;
-
-               /* copy local routing information from WC where applicable */
-               tcslfl->sl         = in_wc->sl;
-               poi->redirect_lid  =
-                       sport->saved_attr.lid | in_wc->dlid_path_bits;
-               poi->redirect_qp   = sport->pma_qp_nr;
-               poi->redirect_qkey = IB_QP1_QKEY;
-
-               ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
-                               &poi->redirect_pkey);
-
-               /* if request was globally routed, copy route info */
-               if (in_grh) {
-                       const struct vertcfl *vertcfl =
-                               (const struct vertcfl *)&in_grh->version_tclass_flow;
-                       memcpy(poi->redirect_gid, in_grh->dgid.raw,
-                              sizeof(poi->redirect_gid));
-                       tcslfl->tc        = vertcfl->tc;
-                       tcslfl->fl        = vertcfl->fl;
-               } else
-                       /* else only fill in default GID */
-                       ehca_query_gid(ibdev, port_num, 0,
-                                      (union ib_gid *)&poi->redirect_gid);
-
-               ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
-                        sport->saved_attr.lid, sport->pma_qp_nr);
-               break;
-
-       case IB_MGMT_METHOD_GET_RESP:
-               return IB_MAD_RESULT_FAILURE;
-
-       default:
-               out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
-               break;
-       }
-
-perf_reply:
-       out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
-
-       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                    const struct ib_mad_hdr *in, size_t in_mad_size,
-                    struct ib_mad_hdr *out, size_t *out_mad_size,
-                    u16 *out_mad_pkey_index)
-{
-       int ret;
-       const struct ib_mad *in_mad = (const struct ib_mad *)in;
-       struct ib_mad *out_mad = (struct ib_mad *)out;
-
-       if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-                        *out_mad_size != sizeof(*out_mad)))
-               return IB_MAD_RESULT_FAILURE;
-
-       if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
-               return IB_MAD_RESULT_FAILURE;
-
-       /* accept only pma request */
-       if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
-               return IB_MAD_RESULT_SUCCESS;
-
-       ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-       ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
-                               in_mad, out_mad);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h
deleted file mode 100644 (file)
index d280b12..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  auxiliary functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef EHCA_TOOLS_H
-#define EHCA_TOOLS_H
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/idr.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/vmalloc.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/device.h>
-
-#include <linux/atomic.h>
-#include <asm/ibmebus.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/hvcall.h>
-
-extern int ehca_debug_level;
-
-#define ehca_dbg(ib_dev, format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
-                                  "PU%04x EHCA_DBG:%s " format "\n", \
-                                  raw_smp_processor_id(), __func__, \
-                                  ## arg); \
-       } while (0)
-
-#define ehca_info(ib_dev, format, arg...) \
-       dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-                raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_warn(ib_dev, format, arg...) \
-       dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-                raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_err(ib_dev, format, arg...) \
-       dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-               raw_smp_processor_id(), __func__, ## arg)
-
-/* use this one only if no ib_dev available */
-#define ehca_gen_dbg(format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-                              raw_smp_processor_id(), __func__, ## arg); \
-       } while (0)
-
-#define ehca_gen_warn(format, arg...) \
-       printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-              raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_gen_err(format, arg...) \
-       printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-              raw_smp_processor_id(), __func__, ## arg)
-
-/**
- * ehca_dmp - printk a memory block, whose length is n*8 bytes.
- * Each line has the following layout:
- * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
- */
-#define ehca_dmp(adr, len, format, args...) \
-       do { \
-               unsigned int x; \
-               unsigned int l = (unsigned int)(len); \
-               unsigned char *deb = (unsigned char *)(adr); \
-               for (x = 0; x < l; x += 16) { \
-                       printk(KERN_INFO "EHCA_DMP:%s " format \
-                              " adr=%p ofs=%04x %016llx %016llx\n", \
-                              __func__, ##args, deb, x, \
-                              *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
-                       deb += 16; \
-               } \
-       } while (0)
-
-/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
-
-/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
-
-/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
-
-/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
-
-/**
- * EHCA_BMASK_SET - return value shifted and masked by mask
- * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
- * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
- * in variable
- */
-#define EHCA_BMASK_SET(mask, value) \
-       ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
-
-/**
- * EHCA_BMASK_GET - extract a parameter from value by mask
- */
-#define EHCA_BMASK_GET(mask, value) \
-       (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
-
-/* Converts ehca to ib return code */
-int ehca2ib_return_code(u64 ehca_rc);
-
-#endif /* EHCA_TOOLS_H */
diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c
deleted file mode 100644 (file)
index 1a1d5d9..0000000
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  userspace support verbs
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-                                       struct ib_udata *udata)
-{
-       struct ehca_ucontext *my_context;
-
-       my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
-       if (!my_context) {
-               ehca_err(device, "Out of memory device=%p", device);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       return &my_context->ib_ucontext;
-}
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context)
-{
-       kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
-       return 0;
-}
-
-static void ehca_mm_open(struct vm_area_struct *vma)
-{
-       u32 *count = (u32 *)vma->vm_private_data;
-       if (!count) {
-               ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-               return;
-       }
-       (*count)++;
-       if (!(*count))
-               ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-       ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-                    vma->vm_start, vma->vm_end, *count);
-}
-
-static void ehca_mm_close(struct vm_area_struct *vma)
-{
-       u32 *count = (u32 *)vma->vm_private_data;
-       if (!count) {
-               ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-                            vma->vm_start, vma->vm_end);
-               return;
-       }
-       (*count)--;
-       ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-                    vma->vm_start, vma->vm_end, *count);
-}
-
-static const struct vm_operations_struct vm_ops = {
-       .open = ehca_mm_open,
-       .close = ehca_mm_close,
-};
-
-static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
-                       u32 *mm_count)
-{
-       int ret;
-       u64 vsize, physical;
-
-       vsize = vma->vm_end - vma->vm_start;
-       if (vsize < EHCA_PAGESIZE) {
-               ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
-               return -EINVAL;
-       }
-
-       physical = galpas->user.fw_handle;
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-       /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
-       ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
-                          vma->vm_page_prot);
-       if (unlikely(ret)) {
-               ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
-               return -ENOMEM;
-       }
-
-       vma->vm_private_data = mm_count;
-       (*mm_count)++;
-       vma->vm_ops = &vm_ops;
-
-       return 0;
-}
-
-static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
-                          u32 *mm_count)
-{
-       int ret;
-       u64 start, ofs;
-       struct page *page;
-
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       start = vma->vm_start;
-       for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
-               u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
-               page = virt_to_page(virt_addr);
-               ret = vm_insert_page(vma, start, page);
-               if (unlikely(ret)) {
-                       ehca_gen_err("vm_insert_page() failed rc=%i", ret);
-                       return ret;
-               }
-               start += PAGE_SIZE;
-       }
-       vma->vm_private_data = mm_count;
-       (*mm_count)++;
-       vma->vm_ops = &vm_ops;
-
-       return 0;
-}
-
-static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
-                       u32 rsrc_type)
-{
-       int ret;
-
-       switch (rsrc_type) {
-       case 0: /* galpa fw handle */
-               ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
-               ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_fw() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       case 1: /* cq queue_addr */
-               ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
-               ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_queue() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
-                        rsrc_type, cq->cq_number);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
-                       u32 rsrc_type)
-{
-       int ret;
-
-       switch (rsrc_type) {
-       case 0: /* galpa fw handle */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
-               ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "remap_pfn_range() failed ret=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return -ENOMEM;
-               }
-               break;
-
-       case 1: /* qp rqueue_addr */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
-               ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
-                                     &qp->mm_count_rqueue);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       case 2: /* qp squeue_addr */
-               ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
-               ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
-                                     &qp->mm_count_squeue);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
-                        rsrc_type, qp->ib_qp.qp_num);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       u64 fileoffset = vma->vm_pgoff;
-       u32 idr_handle = fileoffset & 0x1FFFFFF;
-       u32 q_type = (fileoffset >> 27) & 0x1;    /* CQ, QP,...        */
-       u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
-       u32 ret;
-       struct ehca_cq *cq;
-       struct ehca_qp *qp;
-       struct ib_uobject *uobject;
-
-       switch (q_type) {
-       case  0: /* CQ */
-               read_lock(&ehca_cq_idr_lock);
-               cq = idr_find(&ehca_cq_idr, idr_handle);
-               read_unlock(&ehca_cq_idr_lock);
-
-               /* make sure this mmap really belongs to the authorized user */
-               if (!cq)
-                       return -EINVAL;
-
-               if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
-                       return -EINVAL;
-
-               ret = ehca_mmap_cq(vma, cq, rsrc_type);
-               if (unlikely(ret)) {
-                       ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_cq() failed rc=%i cq_num=%x",
-                                ret, cq->cq_number);
-                       return ret;
-               }
-               break;
-
-       case 1: /* QP */
-               read_lock(&ehca_qp_idr_lock);
-               qp = idr_find(&ehca_qp_idr, idr_handle);
-               read_unlock(&ehca_qp_idr_lock);
-
-               /* make sure this mmap really belongs to the authorized user */
-               if (!qp)
-                       return -EINVAL;
-
-               uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
-               if (!uobject || uobject->context != context)
-                       return -EINVAL;
-
-               ret = ehca_mmap_qp(vma, qp, rsrc_type);
-               if (unlikely(ret)) {
-                       ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_qp() failed rc=%i qp_num=%x",
-                                ret, qp->ib_qp.qp_num);
-                       return ret;
-               }
-               break;
-
-       default:
-               ehca_gen_err("bad queue type %x", q_type);
-               return -EINVAL;
-       }
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c
deleted file mode 100644 (file)
index 89517ff..0000000
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <asm/hvcall.h>
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hcp_phyp.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
-#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
-#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
-#define H_ALL_RES_QP_STORAGE            EHCA_BMASK_IBM(16, 17)
-#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
-#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
-#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
-#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
-#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
-#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
-#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
-
-#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
-#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
-#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
-
-#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
-#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
-#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
-
-#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
-#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
-
-#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
-
-#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
-#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
-#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
-
-#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
-#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
-#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
-
-static DEFINE_SPINLOCK(hcall_lock);
-
-static long ehca_plpar_hcall_norets(unsigned long opcode,
-                                   unsigned long arg1,
-                                   unsigned long arg2,
-                                   unsigned long arg3,
-                                   unsigned long arg4,
-                                   unsigned long arg5,
-                                   unsigned long arg6,
-                                   unsigned long arg7)
-{
-       long ret;
-       int i, sleep_msecs;
-       unsigned long flags = 0;
-
-       if (unlikely(ehca_debug_level >= 2))
-               ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
-                            opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
-
-       for (i = 0; i < 5; i++) {
-               /* serialize hCalls to work around firmware issue */
-               if (ehca_lock_hcalls)
-                       spin_lock_irqsave(&hcall_lock, flags);
-
-               ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
-                                        arg5, arg6, arg7);
-
-               if (ehca_lock_hcalls)
-                       spin_unlock_irqrestore(&hcall_lock, flags);
-
-               if (H_IS_LONG_BUSY(ret)) {
-                       sleep_msecs = get_longbusy_msecs(ret);
-                       msleep_interruptible(sleep_msecs);
-                       continue;
-               }
-
-               if (ret < H_SUCCESS)
-                       ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
-                                    opcode, ret, arg1, arg2, arg3,
-                                    arg4, arg5, arg6, arg7);
-               else
-                       if (unlikely(ehca_debug_level >= 2))
-                               ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
-
-               return ret;
-       }
-
-       return H_BUSY;
-}
-
-static long ehca_plpar_hcall9(unsigned long opcode,
-                             unsigned long *outs, /* array of 9 outputs */
-                             unsigned long arg1,
-                             unsigned long arg2,
-                             unsigned long arg3,
-                             unsigned long arg4,
-                             unsigned long arg5,
-                             unsigned long arg6,
-                             unsigned long arg7,
-                             unsigned long arg8,
-                             unsigned long arg9)
-{
-       long ret;
-       int i, sleep_msecs;
-       unsigned long flags = 0;
-
-       if (unlikely(ehca_debug_level >= 2))
-               ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
-                            arg1, arg2, arg3, arg4, arg5,
-                            arg6, arg7, arg8, arg9);
-
-       for (i = 0; i < 5; i++) {
-               /* serialize hCalls to work around firmware issue */
-               if (ehca_lock_hcalls)
-                       spin_lock_irqsave(&hcall_lock, flags);
-
-               ret = plpar_hcall9(opcode, outs,
-                                  arg1, arg2, arg3, arg4, arg5,
-                                  arg6, arg7, arg8, arg9);
-
-               if (ehca_lock_hcalls)
-                       spin_unlock_irqrestore(&hcall_lock, flags);
-
-               if (H_IS_LONG_BUSY(ret)) {
-                       sleep_msecs = get_longbusy_msecs(ret);
-                       msleep_interruptible(sleep_msecs);
-                       continue;
-               }
-
-               if (ret < H_SUCCESS) {
-                       ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
-                                    opcode, arg1, arg2, arg3, arg4, arg5,
-                                    arg6, arg7, arg8, arg9);
-                       ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-                                    ret, outs[0], outs[1], outs[2], outs[3],
-                                    outs[4], outs[5], outs[6], outs[7],
-                                    outs[8]);
-               } else if (unlikely(ehca_debug_level >= 2))
-                       ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-                                    ret, outs[0], outs[1], outs[2], outs[3],
-                                    outs[4], outs[5], outs[6], outs[7],
-                                    outs[8]);
-               return ret;
-       }
-
-       return H_BUSY;
-}
-
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u32 neq_control,
-                            const u32 number_of_entries,
-                            struct ipz_eq_handle *eq_handle,
-                            u32 *act_nr_of_entries,
-                            u32 *act_pages,
-                            u32 *eq_ist)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-       u64 allocate_controls;
-
-       /* resource type */
-       allocate_controls = 3ULL;
-
-       /* ISN is associated */
-       if (neq_control != 1)
-               allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
-       else /* notification event queue */
-               allocate_controls = (1ULL << 63) | allocate_controls;
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,  /* r4 */
-                               allocate_controls,      /* r5 */
-                               number_of_entries,      /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       eq_handle->handle = outs[0];
-       *act_nr_of_entries = (u32)outs[3];
-       *act_pages = (u32)outs[4];
-       *eq_ist = (u32)outs[5];
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resource - ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-                      struct ipz_eq_handle eq_handle,
-                      const u64 event_mask)
-{
-       return ehca_plpar_hcall_norets(H_RESET_EVENTS,
-                                      adapter_handle.handle, /* r4 */
-                                      eq_handle.handle,      /* r5 */
-                                      event_mask,            /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_cq *cq,
-                            struct ehca_alloc_cq_parms *param)
-{
-       int rc;
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,   /* r4  */
-                               2,                       /* r5  */
-                               param->eq_handle.handle, /* r6  */
-                               cq->token,               /* r7  */
-                               param->nr_cqe,           /* r8  */
-                               0, 0, 0, 0);
-       cq->ipz_cq_handle.handle = outs[0];
-       param->act_nr_of_entries = (u32)outs[3];
-       param->act_pages = (u32)outs[4];
-
-       if (ret == H_SUCCESS) {
-               rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
-               if (rc) {
-                       ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-                                    rc, outs[5]);
-
-                       ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                               adapter_handle.handle,     /* r4 */
-                                               cq->ipz_cq_handle.handle,  /* r5 */
-                                               0, 0, 0, 0, 0);
-                       ret = H_NO_MEM;
-               }
-       }
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_alloc_qp_parms *parms, int is_user)
-{
-       int rc;
-       u64 ret;
-       u64 allocate_controls, max_r10_reg, r11, r12;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       allocate_controls =
-               EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
-                                parms->squeue.page_size)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
-                                parms->rqueue.page_size)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-                                !!(parms->ll_comp_flags & LLQP_RECV_COMP))
-               | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-                                !!(parms->ll_comp_flags & LLQP_SEND_COMP))
-               | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
-                                parms->ud_av_l_key_ctl)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
-
-       max_r10_reg =
-               EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-                              parms->squeue.max_wr + 1)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-                                parms->rqueue.max_wr + 1)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
-                                parms->squeue.max_sge)
-               | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
-                                parms->rqueue.max_sge);
-
-       r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
-
-       if (parms->ext_type == EQPT_SRQ)
-               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
-       else
-               r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,             /* r4  */
-                               allocate_controls,                 /* r5  */
-                               parms->send_cq_handle.handle,
-                               parms->recv_cq_handle.handle,
-                               parms->eq_handle.handle,
-                               ((u64)parms->token << 32) | parms->pd.value,
-                               max_r10_reg, r11, r12);
-
-       parms->qp_handle.handle = outs[0];
-       parms->real_qp_num = (u32)outs[1];
-       parms->squeue.act_nr_wqes =
-               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
-       parms->rqueue.act_nr_wqes =
-               (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
-       parms->squeue.act_nr_sges =
-               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
-       parms->rqueue.act_nr_sges =
-               (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
-       parms->squeue.queue_size =
-               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
-       parms->rqueue.queue_size =
-               (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
-
-       if (ret == H_SUCCESS) {
-               rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
-               if (rc) {
-                       ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-                                    rc, outs[6]);
-
-                       ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                               adapter_handle.handle,     /* r4 */
-                                               parms->qp_handle.handle,  /* r5 */
-                                               0, 0, 0, 0, 0);
-                       ret = H_NO_MEM;
-               }
-       }
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-                     const u8 port_id,
-                     struct hipz_query_port *query_port_response_block)
-{
-       u64 ret;
-       u64 r_cb = __pa(query_port_response_block);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("response block not page aligned");
-               return H_PARAMETER;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
-                                     adapter_handle.handle, /* r4 */
-                                     port_id,               /* r5 */
-                                     r_cb,                  /* r6 */
-                                     0, 0, 0, 0);
-
-       if (ehca_debug_level >= 2)
-               ehca_dmp(query_port_response_block, 64, "response_block");
-
-       return ret;
-}
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-                      const u8 port_id, const u32 port_cap,
-                      const u8 init_type, const int modify_mask)
-{
-       u64 port_attributes = port_cap;
-
-       if (modify_mask & IB_PORT_SHUTDOWN)
-               port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
-       if (modify_mask & IB_PORT_INIT_TYPE)
-               port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
-       if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
-               port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
-
-       return ehca_plpar_hcall_norets(H_MODIFY_PORT,
-                                      adapter_handle.handle, /* r4 */
-                                      port_id,               /* r5 */
-                                      port_attributes,       /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-                    struct hipz_query_hca *query_hca_rblock)
-{
-       u64 r_cb = __pa(query_hca_rblock);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("response_block=%p not page aligned",
-                            query_hca_rblock);
-               return H_PARAMETER;
-       }
-
-       return ehca_plpar_hcall_norets(H_QUERY_HCA,
-                                      adapter_handle.handle, /* r4 */
-                                      r_cb,                  /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-                         const u8 pagesize,
-                         const u8 queue_type,
-                         const u64 resource_handle,
-                         const u64 logical_address_of_page,
-                         u64 count)
-{
-       return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
-                                      adapter_handle.handle,      /* r4  */
-                                      (u64)queue_type | ((u64)pagesize) << 8,
-                                      /* r5  */
-                                      resource_handle,            /* r6  */
-                                      logical_address_of_page,    /* r7  */
-                                      count,                      /* r8  */
-                                      0, 0);
-}
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_eq_handle eq_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count)
-{
-       if (count != 1) {
-               ehca_gen_err("Ppage counter=%llx", count);
-               return H_PARAMETER;
-       }
-       return hipz_h_register_rpage(adapter_handle,
-                                    pagesize,
-                                    queue_type,
-                                    eq_handle.handle,
-                                    logical_address_of_page, count);
-}
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
-                          u32 ist)
-{
-       u64 ret;
-       ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
-                                     adapter_handle.handle, /* r4 */
-                                     ist,                   /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret != H_SUCCESS && ret != H_BUSY)
-               ehca_gen_err("Could not query interrupt state.");
-
-       return ret;
-}
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_cq_handle cq_handle,
-                            struct ehca_pfcq *pfcq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa gal)
-{
-       if (count != 1) {
-               ehca_gen_err("Page counter=%llx", count);
-               return H_PARAMETER;
-       }
-
-       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-                                    cq_handle.handle, logical_address_of_page,
-                                    count);
-}
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_qp_handle qp_handle,
-                            struct ehca_pfqp *pfqp,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa galpa)
-{
-       if (count > 1) {
-               ehca_gen_err("Page counter=%llx", count);
-               return H_PARAMETER;
-       }
-
-       return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-                                    qp_handle.handle, logical_address_of_page,
-                                    count);
-}
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-                              const struct ipz_qp_handle qp_handle,
-                              struct ehca_pfqp *pfqp,
-                              void **log_addr_next_sq_wqe2processed,
-                              void **log_addr_next_rq_wqe2processed,
-                              int dis_and_get_function_code)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-                               adapter_handle.handle,     /* r4 */
-                               dis_and_get_function_code, /* r5 */
-                               qp_handle.handle,          /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       if (log_addr_next_sq_wqe2processed)
-               *log_addr_next_sq_wqe2processed = (void *)outs[0];
-       if (log_addr_next_rq_wqe2processed)
-               *log_addr_next_rq_wqe2processed = (void *)outs[1];
-
-       return ret;
-}
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-                    const struct ipz_qp_handle qp_handle,
-                    struct ehca_pfqp *pfqp,
-                    const u64 update_mask,
-                    struct hcp_modify_qp_control_block *mqpcb,
-                    struct h_galpa gal)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-       ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
-                               adapter_handle.handle, /* r4 */
-                               qp_handle.handle,      /* r5 */
-                               update_mask,           /* r6 */
-                               __pa(mqpcb),           /* r7 */
-                               0, 0, 0, 0, 0);
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Insufficient resources ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-                   const struct ipz_qp_handle qp_handle,
-                   struct ehca_pfqp *pfqp,
-                   struct hcp_modify_qp_control_block *qqpcb,
-                   struct h_galpa gal)
-{
-       return ehca_plpar_hcall_norets(H_QUERY_QP,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      __pa(qqpcb),           /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_qp *qp)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = hcp_galpas_dtor(&qp->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct qp->galpas");
-               return H_RESOURCE;
-       }
-       ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-                               adapter_handle.handle,     /* r4 */
-                               /* function code */
-                               1,                         /* r5 */
-                               qp->ipz_qp_handle.handle,  /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       if (ret == H_HARDWARE)
-               ehca_gen_err("HCA not operational. ret=%lli", ret);
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     qp->ipz_qp_handle.handle,  /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("Resource still in use. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port)
-{
-       return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      port,                  /* r6 */
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port, u32 * pma_qp_nr,
-                      u32 * bma_qp_nr)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
-                               adapter_handle.handle, /* r4 */
-                               qp_handle.handle,      /* r5 */
-                               port,                  /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       *pma_qp_nr = (u32)outs[0];
-       *bma_qp_nr = (u32)outs[1];
-
-       if (ret == H_ALIAS_EXIST)
-               ehca_gen_err("AQP1 already exists. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id)
-{
-       u64 ret;
-
-       ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
-                                     adapter_handle.handle,  /* r4 */
-                                     qp_handle.handle,       /* r5 */
-                                     mcg_dlid,               /* r6 */
-                                     interface_id,           /* r7 */
-                                     subnet_prefix,          /* r8 */
-                                     0, 0);
-
-       if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-       return ret;
-}
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id)
-{
-       return ehca_plpar_hcall_norets(H_DETACH_MCQP,
-                                      adapter_handle.handle, /* r4 */
-                                      qp_handle.handle,      /* r5 */
-                                      mcg_dlid,              /* r6 */
-                                      interface_id,          /* r7 */
-                                      subnet_prefix,         /* r8 */
-                                      0, 0);
-}
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_cq *cq,
-                     u8 force_flag)
-{
-       u64 ret;
-
-       ret = hcp_galpas_dtor(&cq->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct cp->galpas");
-               return H_RESOURCE;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     cq->ipz_cq_handle.handle,  /* r5 */
-                                     force_flag != 0 ? 1L : 0L, /* r6 */
-                                     0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_eq *eq)
-{
-       u64 ret;
-
-       ret = hcp_galpas_dtor(&eq->galpas);
-       if (ret) {
-               ehca_gen_err("Could not destruct eq->galpas");
-               return H_RESOURCE;
-       }
-
-       ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                     adapter_handle.handle,     /* r4 */
-                                     eq->ipz_eq_handle.handle,  /* r5 */
-                                     0, 0, 0, 0, 0);
-
-       if (ret == H_RESOURCE)
-               ehca_gen_err("Resource in use. ret=%lli ", ret);
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u64 vaddr,
-                            const u64 length,
-                            const u32 access_ctrl,
-                            const struct ipz_pd pd,
-                            struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,            /* r4 */
-                               5,                                /* r5 */
-                               vaddr,                            /* r6 */
-                               length,                           /* r7 */
-                               (((u64)access_ctrl) << 32ULL),    /* r8 */
-                               pd.value,                         /* r9 */
-                               0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count)
-{
-       u64 ret;
-
-       if (unlikely(ehca_debug_level >= 3)) {
-               if (count > 1) {
-                       u64 *kpage;
-                       int i;
-                       kpage = __va(logical_address_of_page);
-                       for (i = 0; i < count; i++)
-                               ehca_gen_dbg("kpage[%d]=%p",
-                                            i, (void *)kpage[i]);
-               } else
-                       ehca_gen_dbg("kpage=%p",
-                                    (void *)logical_address_of_page);
-       }
-
-       if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
-               ehca_gen_err("logical_address_of_page not on a 4k boundary "
-                            "adapter_handle=%llx mr=%p mr_handle=%llx "
-                            "pagesize=%x queue_type=%x "
-                            "logical_address_of_page=%llx count=%llx",
-                            adapter_handle.handle, mr,
-                            mr->ipz_mr_handle.handle, pagesize, queue_type,
-                            logical_address_of_page, count);
-               ret = H_PARAMETER;
-       } else
-               ret = hipz_h_register_rpage(adapter_handle, pagesize,
-                                           queue_type,
-                                           mr->ipz_mr_handle.handle,
-                                           logical_address_of_page, count);
-       return ret;
-}
-
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mr *mr,
-                   struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
-                               adapter_handle.handle,     /* r4 */
-                               mr->ipz_mr_handle.handle,  /* r5 */
-                               0, 0, 0, 0, 0, 0, 0);
-       outparms->len = outs[0];
-       outparms->vaddr = outs[1];
-       outparms->acl  = outs[4] >> 32;
-       outparms->lkey = (u32)(outs[5] >> 32);
-       outparms->rkey = (u32)(outs[5] & (0xffffffff));
-
-       return ret;
-}
-
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mr *mr)
-{
-       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                      adapter_handle.handle,    /* r4 */
-                                      mr->ipz_mr_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-                         const struct ehca_mr *mr,
-                         const u64 vaddr_in,
-                         const u64 length,
-                         const u32 access_ctrl,
-                         const struct ipz_pd pd,
-                         const u64 mr_addr_cb,
-                         struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
-                               adapter_handle.handle,    /* r4 */
-                               mr->ipz_mr_handle.handle, /* r5 */
-                               vaddr_in,                 /* r6 */
-                               length,                   /* r7 */
-                               /* r8 */
-                               ((((u64)access_ctrl) << 32ULL) | pd.value),
-                               mr_addr_cb,               /* r9 */
-                               0, 0, 0);
-       outparms->vaddr = outs[1];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-                       const struct ehca_mr *mr,
-                       const struct ehca_mr *orig_mr,
-                       const u64 vaddr_in,
-                       const u32 access_ctrl,
-                       const struct ipz_pd pd,
-                       struct ehca_mr_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
-                               adapter_handle.handle,            /* r4 */
-                               orig_mr->ipz_mr_handle.handle,    /* r5 */
-                               vaddr_in,                         /* r6 */
-                               (((u64)access_ctrl) << 32ULL),    /* r7 */
-                               pd.value,                         /* r8 */
-                               0, 0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->lkey = (u32)outs[2];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mw *mw,
-                            const struct ipz_pd pd,
-                            struct ehca_mw_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-                               adapter_handle.handle,      /* r4 */
-                               6,                          /* r5 */
-                               pd.value,                   /* r6 */
-                               0, 0, 0, 0, 0, 0);
-       outparms->handle.handle = outs[0];
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mw *mw,
-                   struct ehca_mw_hipzout_parms *outparms)
-{
-       u64 ret;
-       unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-       ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
-                               adapter_handle.handle,    /* r4 */
-                               mw->ipz_mw_handle.handle, /* r5 */
-                               0, 0, 0, 0, 0, 0, 0);
-       outparms->rkey = (u32)outs[3];
-
-       return ret;
-}
-
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mw *mw)
-{
-       return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-                                      adapter_handle.handle,    /* r4 */
-                                      mw->ipz_mw_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-                     const u64 ressource_handle,
-                     void *rblock,
-                     unsigned long *byte_count)
-{
-       u64 r_cb = __pa(rblock);
-
-       if (r_cb & (EHCA_PAGESIZE-1)) {
-               ehca_gen_err("rblock not page aligned.");
-               return H_PARAMETER;
-       }
-
-       return ehca_plpar_hcall_norets(H_ERROR_DATA,
-                                      adapter_handle.handle,
-                                      ressource_handle,
-                                      r_cb,
-                                      0, 0, 0, 0);
-}
-
-u64 hipz_h_eoi(int irq)
-{
-       unsigned long xirr;
-
-       iosync();
-       xirr = (0xffULL << 24) | irq;
-
-       return plpar_hcall_norets(H_EOI, xirr);
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h
deleted file mode 100644 (file)
index a46e514..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_IF_H__
-#define __HCP_IF_H__
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "hipz_hw.h"
-
-/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
- * resources, create the empty EQPT (ring).
- */
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u32 neq_control,
-                            const u32 number_of_entries,
-                            struct ipz_eq_handle *eq_handle,
-                            u32 * act_nr_of_entries,
-                            u32 * act_pages,
-                            u32 * eq_ist);
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-                      struct ipz_eq_handle eq_handle,
-                      const u64 event_mask);
-/*
- * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
- * resources, create the empty CQPT (ring).
- */
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_cq *cq,
-                            struct ehca_alloc_cq_parms *param);
-
-
-/*
- * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
- * initialize resources, create empty QPPTs (2 rings).
- */
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-                            struct ehca_alloc_qp_parms *parms, int is_user);
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-                     const u8 port_id,
-                     struct hipz_query_port *query_port_response_block);
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-                      const u8 port_id, const u32 port_cap,
-                      const u8 init_type, const int modify_mask);
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-                    struct hipz_query_hca *query_hca_rblock);
-
-/*
- * hipz_h_register_rpage internal function in hcp_if.h for all
- * hcp_H_REGISTER_RPAGE calls.
- */
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-                         const u8 pagesize,
-                         const u8 queue_type,
-                         const u64 resource_handle,
-                         const u64 logical_address_of_page,
-                         u64 count);
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_eq_handle eq_handle,
-                            struct ehca_pfeq *pfeq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count);
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle
-                          hcp_adapter_handle,
-                          u32 ist);
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_cq_handle cq_handle,
-                            struct ehca_pfcq *pfcq,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa gal);
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-                            const struct ipz_qp_handle qp_handle,
-                            struct ehca_pfqp *pfqp,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count,
-                            const struct h_galpa galpa);
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-                              const struct ipz_qp_handle qp_handle,
-                              struct ehca_pfqp *pfqp,
-                              void **log_addr_next_sq_wqe_tb_processed,
-                              void **log_addr_next_rq_wqe_tb_processed,
-                              int dis_and_get_function_code);
-enum hcall_sigt {
-       HCALL_SIGT_NO_CQE = 0,
-       HCALL_SIGT_BY_WQE = 1,
-       HCALL_SIGT_EVERY = 2
-};
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-                    const struct ipz_qp_handle qp_handle,
-                    struct ehca_pfqp *pfqp,
-                    const u64 update_mask,
-                    struct hcp_modify_qp_control_block *mqpcb,
-                    struct h_galpa gal);
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-                   const struct ipz_qp_handle qp_handle,
-                   struct ehca_pfqp *pfqp,
-                   struct hcp_modify_qp_control_block *qqpcb,
-                   struct h_galpa gal);
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_qp *qp);
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port);
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u32 port, u32 * pma_qp_nr,
-                      u32 * bma_qp_nr);
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-                      const struct ipz_qp_handle qp_handle,
-                      struct h_galpa gal,
-                      u16 mcg_dlid,
-                      u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_cq *cq,
-                     u8 force_flag);
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-                     struct ehca_eq *eq);
-
-/*
- * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u64 vaddr,
-                            const u64 length,
-                            const u32 access_ctrl,
-                            const struct ipz_pd pd,
-                            struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mr *mr,
-                            const u8 pagesize,
-                            const u8 queue_type,
-                            const u64 logical_address_of_page,
-                            const u64 count);
-
-/* hipz_h_query_mr queries MR in HW and FW */
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mr *mr,
-                   struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mr frees MR resources in HW and FW */
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mr *mr);
-
-/* hipz_h_reregister_pmr reregisters MR in HW and FW */
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-                         const struct ehca_mr *mr,
-                         const u64 vaddr_in,
-                         const u64 length,
-                         const u32 access_ctrl,
-                         const struct ipz_pd pd,
-                         const u64 mr_addr_cb,
-                         struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_smr register shared MR in HW and FW */
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-                       const struct ehca_mr *mr,
-                       const struct ehca_mr *orig_mr,
-                       const u64 vaddr_in,
-                       const u32 access_ctrl,
-                       const struct ipz_pd pd,
-                       struct ehca_mr_hipzout_parms *outparms);
-
-/*
- * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                            const struct ehca_mw *mw,
-                            const struct ipz_pd pd,
-                            struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_query_mw queries MW in HW and FW */
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-                   const struct ehca_mw *mw,
-                   struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mw frees MW resources in HW and FW */
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-                           const struct ehca_mw *mw);
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-                     const u64 ressource_handle,
-                     void *rblock,
-                     unsigned long *byte_count);
-u64 hipz_h_eoi(int irq);
-
-#endif /* __HCP_IF_H__ */
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c
deleted file mode 100644 (file)
index 077376f..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *   load store abstraction for ehca register access with tracing
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-u64 hcall_map_page(u64 physaddr)
-{
-       return (u64)ioremap(physaddr, EHCA_PAGESIZE);
-}
-
-int hcall_unmap_page(u64 mapaddr)
-{
-       iounmap((volatile void __iomem *) mapaddr);
-       return 0;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-                   u64 paddr_kernel, u64 paddr_user)
-{
-       if (!is_user) {
-               galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
-               if (!galpas->kernel.fw_handle)
-                       return -ENOMEM;
-       } else
-               galpas->kernel.fw_handle = 0;
-
-       galpas->user.fw_handle = paddr_user;
-
-       return 0;
-}
-
-int hcp_galpas_dtor(struct h_galpas *galpas)
-{
-       if (galpas->kernel.fw_handle) {
-               int ret = hcall_unmap_page(galpas->kernel.fw_handle);
-               if (ret)
-                       return ret;
-       }
-
-       galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h
deleted file mode 100644 (file)
index d1b0299..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware calls
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_PHYP_H__
-#define __HCP_PHYP_H__
-
-
-/*
- * eHCA page (mapped into memory)
- * resource to access eHCA register pages in CPU address space
-*/
-struct h_galpa {
-       u64 fw_handle;
-       /* for pSeries this is a 64bit memory address where
-          I/O memory is mapped into CPU address space (kv) */
-};
-
-/*
- * resource to access eHCA address space registers, all types
- */
-struct h_galpas {
-       u32 pid;                /*PID of userspace galpa checking */
-       struct h_galpa user;    /* user space accessible resource,
-                                  set to 0 if unused */
-       struct h_galpa kernel;  /* kernel space accessible resource,
-                                  set to 0 if unused */
-};
-
-static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
-{
-       u64 addr = galpa.fw_handle + offset;
-       return *(volatile u64 __force *)addr;
-}
-
-static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
-{
-       u64 addr = galpa.fw_handle + offset;
-       *(volatile u64 __force *)addr = value;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-                   u64 paddr_kernel, u64 paddr_user);
-
-int hcp_galpas_dtor(struct h_galpas *galpas);
-
-u64 hcall_map_page(u64 physaddr);
-
-int hcall_unmap_page(u64 mapaddr);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h
deleted file mode 100644 (file)
index 9dac93d..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_H__
-#define __HIPZ_FNS_H__
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-#include "hipz_fns_core.h"
-
-#define hipz_galpa_store_eq(gal, offset, value) \
-       hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_eq(gal, offset) \
-       hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qped(gal, offset, value) \
-       hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_qped(gal, offset) \
-       hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
-
-#define hipz_galpa_store_mrmw(gal, offset, value) \
-       hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_mrmw(gal, offset) \
-       hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h
deleted file mode 100644 (file)
index 868735f..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_CORE_H__
-#define __HIPZ_FNS_CORE_H__
-
-#include "hcp_phyp.h"
-#include "hipz_hw.h"
-
-#define hipz_galpa_store_cq(gal, offset, value) \
-       hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_cq(gal, offset) \
-       hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qp(gal, offset, value) \
-       hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
-#define hipz_galpa_load_qp(gal, offset) \
-       hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
-
-static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-       /*  ringing doorbell :-) */
-       hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
-                           EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
-}
-
-static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-       /*  ringing doorbell :-) */
-       hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
-                           EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
-}
-
-static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
-{
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
-                           EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
-}
-
-static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
-{
-       u64 cqx_n0_reg;
-
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
-                           EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
-                                          value));
-       cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
-}
-
-static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
-{
-       u64 cqx_n1_reg;
-
-       hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
-                           EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
-       cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
-}
-
-#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h
deleted file mode 100644 (file)
index bf996c7..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  eHCA register definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_HW_H__
-#define __HIPZ_HW_H__
-
-#include "ehca_tools.h"
-
-#define EHCA_MAX_MTU 4
-
-/* QP Table Entry Memory Map */
-struct hipz_qptemm {
-       u64 qpx_hcr;
-       u64 qpx_c;
-       u64 qpx_herr;
-       u64 qpx_aer;
-/* 0x20*/
-       u64 qpx_sqa;
-       u64 qpx_sqc;
-       u64 qpx_rqa;
-       u64 qpx_rqc;
-/* 0x40*/
-       u64 qpx_st;
-       u64 qpx_pmstate;
-       u64 qpx_pmfa;
-       u64 qpx_pkey;
-/* 0x60*/
-       u64 qpx_pkeya;
-       u64 qpx_pkeyb;
-       u64 qpx_pkeyc;
-       u64 qpx_pkeyd;
-/* 0x80*/
-       u64 qpx_qkey;
-       u64 qpx_dqp;
-       u64 qpx_dlidp;
-       u64 qpx_portp;
-/* 0xa0*/
-       u64 qpx_slidp;
-       u64 qpx_slidpp;
-       u64 qpx_dlida;
-       u64 qpx_porta;
-/* 0xc0*/
-       u64 qpx_slida;
-       u64 qpx_slidpa;
-       u64 qpx_slvl;
-       u64 qpx_ipd;
-/* 0xe0*/
-       u64 qpx_mtu;
-       u64 qpx_lato;
-       u64 qpx_rlimit;
-       u64 qpx_rnrlimit;
-/* 0x100*/
-       u64 qpx_t;
-       u64 qpx_sqhp;
-       u64 qpx_sqptp;
-       u64 qpx_nspsn;
-/* 0x120*/
-       u64 qpx_nspsnhwm;
-       u64 reserved1;
-       u64 qpx_sdsi;
-       u64 qpx_sdsbc;
-/* 0x140*/
-       u64 qpx_sqwsize;
-       u64 qpx_sqwts;
-       u64 qpx_lsn;
-       u64 qpx_nssn;
-/* 0x160 */
-       u64 qpx_mor;
-       u64 qpx_cor;
-       u64 qpx_sqsize;
-       u64 qpx_erc;
-/* 0x180*/
-       u64 qpx_rnrrc;
-       u64 qpx_ernrwt;
-       u64 qpx_rnrresp;
-       u64 qpx_lmsna;
-/* 0x1a0 */
-       u64 qpx_sqhpc;
-       u64 qpx_sqcptp;
-       u64 qpx_sigt;
-       u64 qpx_wqecnt;
-/* 0x1c0*/
-       u64 qpx_rqhp;
-       u64 qpx_rqptp;
-       u64 qpx_rqsize;
-       u64 qpx_nrr;
-/* 0x1e0*/
-       u64 qpx_rdmac;
-       u64 qpx_nrpsn;
-       u64 qpx_lapsn;
-       u64 qpx_lcr;
-/* 0x200*/
-       u64 qpx_rwc;
-       u64 qpx_rwva;
-       u64 qpx_rdsi;
-       u64 qpx_rdsbc;
-/* 0x220*/
-       u64 qpx_rqwsize;
-       u64 qpx_crmsn;
-       u64 qpx_rdd;
-       u64 qpx_larpsn;
-/* 0x240*/
-       u64 qpx_pd;
-       u64 qpx_scqn;
-       u64 qpx_rcqn;
-       u64 qpx_aeqn;
-/* 0x260*/
-       u64 qpx_aaelog;
-       u64 qpx_ram;
-       u64 qpx_rdmaqe0;
-       u64 qpx_rdmaqe1;
-/* 0x280*/
-       u64 qpx_rdmaqe2;
-       u64 qpx_rdmaqe3;
-       u64 qpx_nrpsnhwm;
-/* 0x298*/
-       u64 reserved[(0x400 - 0x298) / 8];
-/* 0x400 extended data */
-       u64 reserved_ext[(0x500 - 0x400) / 8];
-/* 0x500 */
-       u64 reserved2[(0x1000 - 0x500) / 8];
-/* 0x1000      */
-};
-
-#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
-
-/* MRMWPT Entry Memory Map */
-struct hipz_mrmwmm {
-       /* 0x00 */
-       u64 mrx_hcr;
-
-       u64 mrx_c;
-       u64 mrx_herr;
-       u64 mrx_aer;
-       /* 0x20 */
-       u64 mrx_pp;
-       u64 reserved1;
-       u64 reserved2;
-       u64 reserved3;
-       /* 0x40 */
-       u64 reserved4[(0x200 - 0x40) / 8];
-       /* 0x200 */
-       u64 mrx_ctl[64];
-
-};
-
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
-
-struct hipz_qpedmm {
-       /* 0x00 */
-       u64 reserved0[(0x400) / 8];
-       /* 0x400 */
-       u64 qpedx_phh;
-       u64 qpedx_ppsgp;
-       /* 0x410 */
-       u64 qpedx_ppsgu;
-       u64 qpedx_ppdgp;
-       /* 0x420 */
-       u64 qpedx_ppdgu;
-       u64 qpedx_aph;
-       /* 0x430 */
-       u64 qpedx_apsgp;
-       u64 qpedx_apsgu;
-       /* 0x440 */
-       u64 qpedx_apdgp;
-       u64 qpedx_apdgu;
-       /* 0x450 */
-       u64 qpedx_apav;
-       u64 qpedx_apsav;
-       /* 0x460  */
-       u64 qpedx_hcr;
-       u64 reserved1[4];
-       /* 0x488 */
-       u64 qpedx_rrl0;
-       /* 0x490 */
-       u64 qpedx_rrrkey0;
-       u64 qpedx_rrva0;
-       /* 0x4a0 */
-       u64 reserved2;
-       u64 qpedx_rrl1;
-       /* 0x4b0 */
-       u64 qpedx_rrrkey1;
-       u64 qpedx_rrva1;
-       /* 0x4c0 */
-       u64 reserved3;
-       u64 qpedx_rrl2;
-       /* 0x4d0 */
-       u64 qpedx_rrrkey2;
-       u64 qpedx_rrva2;
-       /* 0x4e0 */
-       u64 reserved4;
-       u64 qpedx_rrl3;
-       /* 0x4f0 */
-       u64 qpedx_rrrkey3;
-       u64 qpedx_rrva3;
-};
-
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
-
-/* CQ Table Entry Memory Map */
-struct hipz_cqtemm {
-       u64 cqx_hcr;
-       u64 cqx_c;
-       u64 cqx_herr;
-       u64 cqx_aer;
-/* 0x20  */
-       u64 cqx_ptp;
-       u64 cqx_tp;
-       u64 cqx_fec;
-       u64 cqx_feca;
-/* 0x40  */
-       u64 cqx_ep;
-       u64 cqx_eq;
-/* 0x50  */
-       u64 reserved1;
-       u64 cqx_n0;
-/* 0x60  */
-       u64 cqx_n1;
-       u64 reserved2[(0x1000 - 0x60) / 8];
-/* 0x1000 */
-};
-
-#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32, 63)
-#define CQX_FECADDER              EHCA_BMASK_IBM(32, 63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
-
-/* EQ Table Entry Memory Map */
-struct hipz_eqtemm {
-       u64 eqx_hcr;
-       u64 eqx_c;
-
-       u64 eqx_herr;
-       u64 eqx_aer;
-/* 0x20 */
-       u64 eqx_ptp;
-       u64 eqx_tp;
-       u64 eqx_ssba;
-       u64 eqx_psba;
-
-/* 0x40 */
-       u64 eqx_cec;
-       u64 eqx_meql;
-       u64 eqx_xisbi;
-       u64 eqx_xisc;
-/* 0x60 */
-       u64 eqx_it;
-
-};
-
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
-
-/* access control defines for MR/MW */
-#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
-#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
-#define HIPZ_ACCESSCTRL_R_READ   0x00200000
-#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
-#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
-
-/* query hca response block */
-struct hipz_query_hca {
-       u32 cur_reliable_dg;
-       u32 cur_qp;
-       u32 cur_cq;
-       u32 cur_eq;
-       u32 cur_mr;
-       u32 cur_mw;
-       u32 cur_ee_context;
-       u32 cur_mcast_grp;
-       u32 cur_qp_attached_mcast_grp;
-       u32 reserved1;
-       u32 cur_ipv6_qp;
-       u32 cur_eth_qp;
-       u32 cur_hp_mr;
-       u32 reserved2[3];
-       u32 max_rd_domain;
-       u32 max_qp;
-       u32 max_cq;
-       u32 max_eq;
-       u32 max_mr;
-       u32 max_hp_mr;
-       u32 max_mw;
-       u32 max_mrwpte;
-       u32 max_special_mrwpte;
-       u32 max_rd_ee_context;
-       u32 max_mcast_grp;
-       u32 max_total_mcast_qp_attach;
-       u32 max_mcast_qp_attach;
-       u32 max_raw_ipv6_qp;
-       u32 max_raw_ethy_qp;
-       u32 internal_clock_frequency;
-       u32 max_pd;
-       u32 max_ah;
-       u32 max_cqe;
-       u32 max_wqes_wq;
-       u32 max_partitions;
-       u32 max_rr_ee_context;
-       u32 max_rr_qp;
-       u32 max_rr_hca;
-       u32 max_act_wqs_ee_context;
-       u32 max_act_wqs_qp;
-       u32 max_sge;
-       u32 max_sge_rd;
-       u32 memory_page_size_supported;
-       u64 max_mr_size;
-       u32 local_ca_ack_delay;
-       u32 num_ports;
-       u32 vendor_id;
-       u32 vendor_part_id;
-       u32 hw_ver;
-       u64 node_guid;
-       u64 hca_cap_indicators;
-       u32 data_counter_register_size;
-       u32 max_shared_rq;
-       u32 max_isns_eq;
-       u32 max_neq;
-} __attribute__ ((packed));
-
-#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
-#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
-#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
-#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
-#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
-#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
-#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
-#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
-#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
-#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
-#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
-#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
-#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
-#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
-#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
-#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
-#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
-#define HCA_CAP_H_ALLOC_RES_SYNC      EHCA_BMASK_IBM(19, 19)
-
-/* query port response block */
-struct hipz_query_port {
-       u32 state;
-       u32 bad_pkey_cntr;
-       u32 lmc;
-       u32 lid;
-       u32 subnet_timeout;
-       u32 qkey_viol_cntr;
-       u32 sm_sl;
-       u32 sm_lid;
-       u32 capability_mask;
-       u32 init_type_reply;
-       u32 pkey_tbl_len;
-       u32 gid_tbl_len;
-       u64 gid_prefix;
-       u32 port_nr;
-       u16 pkey_entries[16];
-       u8  reserved1[32];
-       u32 trent_size;
-       u32 trbuf_size;
-       u64 max_msg_sz;
-       u32 max_mtu;
-       u32 vl_cap;
-       u32 phys_pstate;
-       u32 phys_state;
-       u32 phys_speed;
-       u32 phys_width;
-       u8  reserved2[1884];
-       u64 guid_entries[255];
-} __attribute__ ((packed));
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c
deleted file mode 100644 (file)
index 7ffc748..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ipz_pt_fn.h"
-#include "ehca_classes.h"
-
-#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
-
-struct kmem_cache *small_qp_cache;
-
-void *ipz_qpageit_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       queue->current_q_offset += queue->pagesize;
-       if (queue->current_q_offset > queue->queue_length) {
-               queue->current_q_offset -= queue->pagesize;
-               ret = NULL;
-       }
-       if (((u64)ret) % queue->pagesize) {
-               ehca_gen_err("ERROR!! not at PAGE-Boundary");
-               return NULL;
-       }
-       return ret;
-}
-
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u64 last_entry_in_q = queue->queue_length - queue->qe_size;
-
-       queue->current_q_offset += queue->qe_size;
-       if (queue->current_q_offset > last_entry_in_q) {
-               queue->current_q_offset = 0;
-               queue->toggle_state = (~queue->toggle_state) & 1;
-       }
-
-       return ret;
-}
-
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
-{
-       int i;
-       for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
-               u64 page = __pa(queue->queue_pages[i]);
-               if (addr >= page && addr < page + queue->pagesize) {
-                       *q_offset = addr - page + i * queue->pagesize;
-                       return 0;
-               }
-       }
-       return -EINVAL;
-}
-
-#if PAGE_SHIFT < EHCA_PAGESHIFT
-#error Kernel pages must be at least as large than eHCA pages (4K) !
-#endif
-
-/*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
-static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
-{
-       int k, f = 0;
-       u8 *kpage;
-
-       while (f < nr_of_pages) {
-               kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
-               if (!kpage)
-                       goto out;
-
-               for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
-                       queue->queue_pages[f] = (struct ipz_page *)kpage;
-                       kpage += EHCA_PAGESIZE;
-                       f++;
-               }
-       }
-       return 1;
-
-out:
-       for (f = 0; f < nr_of_pages && queue->queue_pages[f];
-            f += PAGES_PER_KPAGE)
-               free_page((unsigned long)(queue->queue_pages)[f]);
-       return 0;
-}
-
-static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-       int order = ilog2(queue->pagesize) - 9;
-       struct ipz_small_queue_page *page;
-       unsigned long bit;
-
-       mutex_lock(&pd->lock);
-
-       if (!list_empty(&pd->free[order]))
-               page = list_entry(pd->free[order].next,
-                                 struct ipz_small_queue_page, list);
-       else {
-               page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
-               if (!page)
-                       goto out;
-
-               page->page = get_zeroed_page(GFP_KERNEL);
-               if (!page->page) {
-                       kmem_cache_free(small_qp_cache, page);
-                       goto out;
-               }
-
-               list_add(&page->list, &pd->free[order]);
-       }
-
-       bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
-       __set_bit(bit, page->bitmap);
-       page->fill++;
-
-       if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
-               list_move(&page->list, &pd->full[order]);
-
-       mutex_unlock(&pd->lock);
-
-       queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
-       queue->small_page = page;
-       queue->offset = bit << (order + 9);
-       return 1;
-
-out:
-       ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
-       mutex_unlock(&pd->lock);
-       return 0;
-}
-
-static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-       int order = ilog2(queue->pagesize) - 9;
-       struct ipz_small_queue_page *page = queue->small_page;
-       unsigned long bit;
-       int free_page = 0;
-
-       bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
-               >> (order + 9);
-
-       mutex_lock(&pd->lock);
-
-       __clear_bit(bit, page->bitmap);
-       page->fill--;
-
-       if (page->fill == 0) {
-               list_del(&page->list);
-               free_page = 1;
-       }
-
-       if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
-               /* the page was full until we freed the chunk */
-               list_move_tail(&page->list, &pd->free[order]);
-
-       mutex_unlock(&pd->lock);
-
-       if (free_page) {
-               free_page(page->page);
-               kmem_cache_free(small_qp_cache, page);
-       }
-}
-
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-                  const u32 nr_of_pages, const u32 pagesize,
-                  const u32 qe_size, const u32 nr_of_sg,
-                  int is_small)
-{
-       if (pagesize > PAGE_SIZE) {
-               ehca_gen_err("FATAL ERROR: pagesize=%x "
-                            "is greater than kernel page size", pagesize);
-               return 0;
-       }
-
-       /* init queue fields */
-       queue->queue_length = nr_of_pages * pagesize;
-       queue->pagesize = pagesize;
-       queue->qe_size = qe_size;
-       queue->act_nr_of_sg = nr_of_sg;
-       queue->current_q_offset = 0;
-       queue->toggle_state = 1;
-       queue->small_page = NULL;
-
-       /* allocate queue page pointers */
-       queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
-                                    GFP_KERNEL | __GFP_NOWARN);
-       if (!queue->queue_pages) {
-               queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
-               if (!queue->queue_pages) {
-                       ehca_gen_err("Couldn't allocate queue page list");
-                       return 0;
-               }
-       }
-
-       /* allocate actual queue pages */
-       if (is_small) {
-               if (!alloc_small_queue_page(queue, pd))
-                       goto ipz_queue_ctor_exit0;
-       } else
-               if (!alloc_queue_pages(queue, nr_of_pages))
-                       goto ipz_queue_ctor_exit0;
-
-       return 1;
-
-ipz_queue_ctor_exit0:
-       ehca_gen_err("Couldn't alloc pages queue=%p "
-                "nr_of_pages=%x",  queue, nr_of_pages);
-       kvfree(queue->queue_pages);
-
-       return 0;
-}
-
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
-{
-       int i, nr_pages;
-
-       if (!queue || !queue->queue_pages) {
-               ehca_gen_dbg("queue or queue_pages is NULL");
-               return 0;
-       }
-
-       if (queue->small_page)
-               free_small_queue_page(queue, pd);
-       else {
-               nr_pages = queue->queue_length / queue->pagesize;
-               for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
-                       free_page((unsigned long)queue->queue_pages[i]);
-       }
-
-       kvfree(queue->queue_pages);
-
-       return 1;
-}
-
-int ehca_init_small_qp_cache(void)
-{
-       small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
-                                          sizeof(struct ipz_small_queue_page),
-                                          0, SLAB_HWCACHE_ALIGN, NULL);
-       if (!small_qp_cache)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void ehca_cleanup_small_qp_cache(void)
-{
-       kmem_cache_destroy(small_qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h
deleted file mode 100644 (file)
index a801274..0000000
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __IPZ_PT_FN_H__
-#define __IPZ_PT_FN_H__
-
-#define EHCA_PAGESHIFT   12
-#define EHCA_PAGESIZE   4096UL
-#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
-#define EHCA_PT_ENTRIES 512UL
-
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-
-struct ehca_pd;
-struct ipz_small_queue_page;
-
-extern struct kmem_cache *small_qp_cache;
-
-/* struct generic ehca page */
-struct ipz_page {
-       u8 entries[EHCA_PAGESIZE];
-};
-
-#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
-
-struct ipz_small_queue_page {
-       unsigned long page;
-       unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
-       int fill;
-       void *mapped_addr;
-       u32 mmap_count;
-       struct list_head list;
-};
-
-/* struct generic queue in linux kernel virtual memory (kv) */
-struct ipz_queue {
-       u64 current_q_offset;   /* current queue entry */
-
-       struct ipz_page **queue_pages;  /* array of pages belonging to queue */
-       u32 qe_size;            /* queue entry size */
-       u32 act_nr_of_sg;
-       u32 queue_length;       /* queue length allocated in bytes */
-       u32 pagesize;
-       u32 toggle_state;       /* toggle flag - per page */
-       u32 offset; /* save offset within page for small_qp */
-       struct ipz_small_queue_page *small_page;
-};
-
-/*
- * return current Queue Entry for a certain q_offset
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
-{
-       struct ipz_page *current_page;
-       if (q_offset >= queue->queue_length)
-               return NULL;
-       current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
-       return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
-}
-
-/*
- * return current Queue Entry
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_get(struct ipz_queue *queue)
-{
-       return ipz_qeit_calc(queue, queue->current_q_offset);
-}
-
-/*
- * return current Queue Page , increment Queue Page iterator from
- * page to page in struct ipz_queue, last increment will return 0! and
- * NOT wrap
- * returns address (kv) of Queue Page
- * warning don't use in parallel with ipz_QE_get_inc()
- */
-void *ipz_qpageit_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       queue->current_q_offset += queue->qe_size;
-       if (queue->current_q_offset >= queue->queue_length) {
-               queue->current_q_offset = 0;
-               /* toggle the valid flag */
-               queue->toggle_state = (~queue->toggle_state) & 1;
-       }
-
-       return ret;
-}
-
-/*
- * return a bool indicating whether current Queue Entry is valid
- */
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-       struct ehca_cqe *cqe = ipz_qeit_get(queue);
-       return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
-}
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
-{
-       return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
-}
-
-/*
- * returns and resets Queue Entry iterator
- * returns address (kv) of first Queue Entry
- */
-static inline void *ipz_qeit_reset(struct ipz_queue *queue)
-{
-       queue->current_q_offset = 0;
-       return ipz_qeit_get(queue);
-}
-
-/*
- * return the q_offset corresponding to an absolute address
- */
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
-
-/*
- * return the next queue offset. don't modify the queue.
- */
-static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
-{
-       offset += queue->qe_size;
-       if (offset >= queue->queue_length) offset = 0;
-       return offset;
-}
-
-/* struct generic page table */
-struct ipz_pt {
-       u64 entries[EHCA_PT_ENTRIES];
-};
-
-/* struct page table for a queue, only to be used in pf */
-struct ipz_qpt {
-       /* queue page tables (kv), use u64 because we know the element length */
-       u64 *qpts;
-       u32 n_qpts;
-       u32 n_ptes;       /*  number of page table entries */
-       u64 *current_pte_addr;
-};
-
-/*
- * constructor for a ipz_queue_t, placement new for ipz_queue_t,
- * new for all dependent datastructors
- * all QP Tables are the same
- * flow:
- *    allocate+pin queue
- * see ipz_qpt_ctor()
- * returns true if ok, false if out of memory
- */
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-                  const u32 nr_of_pages, const u32 pagesize,
-                  const u32 qe_size, const u32 nr_of_sg,
-                  int is_small);
-
-/*
- * destructor for a ipz_queue_t
- *  -# free queue
- *  see ipz_queue_ctor()
- *  returns true if ok, false if queue was NULL-ptr of free failed
- */
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
-
-/*
- * constructor for a ipz_qpt_t,
- * placement new for struct ipz_queue, new for all dependent datastructors
- * all QP Tables are the same,
- * flow:
- * -# allocate+pin queue
- * -# initialise ptcb
- * -# allocate+pin PTs
- * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
- * -# the ring must have room for exactly nr_of_PTEs
- * see ipz_qpt_ctor()
- */
-void ipz_qpt_ctor(struct ipz_qpt *qpt,
-                 const u32 nr_of_qes,
-                 const u32 pagesize,
-                 const u32 qe_size,
-                 const u8 lowbyte, const u8 toggle,
-                 u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- * fix EQ page problems
- */
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Event Queue Entry, increment Queue Entry iterator
- * by one step in struct ipz_queue if valid, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_queue_QPageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- */
-static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u32 qe = *(u8 *)ret;
-       if ((qe >> 7) != (queue->toggle_state & 1))
-               return NULL;
-       ipz_qeit_eq_get_inc(queue); /* this is a good one */
-       return ret;
-}
-
-static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
-{
-       void *ret = ipz_qeit_get(queue);
-       u32 qe = *(u8 *)ret;
-       if ((qe >> 7) != (queue->toggle_state & 1))
-               return NULL;
-       return ret;
-}
-
-/* returns address (GX) of first queue entry */
-static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
-{
-       return be64_to_cpu(qpt->qpts[0]);
-}
-
-/* returns address (kv) of first page of queue page table */
-static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
-{
-       return qpt->qpts;
-}
-
-#endif                         /* __IPZ_PT_FN_H__ */
index 568f185a022dffa5bf24a1af010261a4b4f7b5c0..a3f8b884fdd625fef680f5342655bc1832978eca 100644 (file)
@@ -167,10 +167,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
        rval = init_mregion(&mr->mr, pd, count);
        if (rval)
                goto bail;
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
+
        rval = hfi1_alloc_lkey(&mr->mr, 0);
        if (rval)
                goto bail_mregion;
@@ -187,52 +184,6 @@ bail:
        goto done;
 }
 
-/**
- * hfi1_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *buffer_list,
-                              int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct hfi1_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, pd);
-       if (IS_ERR(mr)) {
-               ret = (struct ib_mr *)mr;
-               goto bail;
-       }
-
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.access_flags = acc;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == HFI1_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
 /**
  * hfi1_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
index ef0feaa684a48b7b285ce490ad9791d98c052ddd..09b8d412ee9085667da5fd3bccf35d46724dc49e 100644 (file)
@@ -2052,7 +2052,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        ibdev->poll_cq = hfi1_poll_cq;
        ibdev->req_notify_cq = hfi1_req_notify_cq;
        ibdev->get_dma_mr = hfi1_get_dma_mr;
-       ibdev->reg_phys_mr = hfi1_reg_phys_mr;
        ibdev->reg_user_mr = hfi1_reg_user_mr;
        ibdev->dereg_mr = hfi1_dereg_mr;
        ibdev->alloc_mr = hfi1_alloc_mr;
index 72106e5362b9862d448be822220f2ee7b0aa30f4..286e468b0479791c49c8b5dfeeb8b991d0ff2785 100644 (file)
@@ -1024,10 +1024,6 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 
 struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
 
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
-                              struct ib_phys_buf *buffer_list,
-                              int num_phys_buf, int acc, u64 *iova_start);
-
 struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                               u64 virt_addr, int mr_access_flags,
                               struct ib_udata *udata);
diff --git a/drivers/staging/rdma/ipath/Kconfig b/drivers/staging/rdma/ipath/Kconfig
deleted file mode 100644 (file)
index 041ce06..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-config INFINIBAND_IPATH
-       tristate "QLogic HTX HCA support"
-       depends on 64BIT && NET && HT_IRQ
-       ---help---
-       This is a driver for the deprecated QLogic Hyper-Transport
-       IB host channel adapter (model QHT7140),
-       including InfiniBand verbs support.  This driver allows these
-       devices to be used with both kernel upper level protocols such
-       as IP-over-InfiniBand as well as with userspace applications
-       (in conjunction with InfiniBand userspace access).
-       For QLogic PCIe QLE based cards, use the QIB driver instead.
-
-       If you have this hardware you will need to boot with PAT disabled
-       on your x86-64 systems, use the nopat kernel parameter.
-
-       Note that this driver will soon be removed entirely from the kernel.
diff --git a/drivers/staging/rdma/ipath/Makefile b/drivers/staging/rdma/ipath/Makefile
deleted file mode 100644 (file)
index 4496f28..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-       -DIPATH_KERN_TYPE=0
-
-obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-
-ib_ipath-y := \
-       ipath_cq.o \
-       ipath_diag.o \
-       ipath_dma.o \
-       ipath_driver.o \
-       ipath_eeprom.o \
-       ipath_file_ops.o \
-       ipath_fs.o \
-       ipath_init_chip.o \
-       ipath_intr.o \
-       ipath_keys.o \
-       ipath_mad.o \
-       ipath_mmap.o \
-       ipath_mr.o \
-       ipath_qp.o \
-       ipath_rc.o \
-       ipath_ruc.o \
-       ipath_sdma.o \
-       ipath_srq.o \
-       ipath_stats.o \
-       ipath_sysfs.o \
-       ipath_uc.o \
-       ipath_ud.o \
-       ipath_user_pages.o \
-       ipath_user_sdma.o \
-       ipath_verbs_mcast.o \
-       ipath_verbs.o
-
-ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
-
-ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/staging/rdma/ipath/TODO b/drivers/staging/rdma/ipath/TODO
deleted file mode 100644 (file)
index cb00158..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-The ipath driver has been moved to staging in preparation for its removal in a
-few releases. The driver will be deleted during the 4.6 merge window.
-
-Contact Dennis Dalessandro <dennis.dalessandro@intel.com> and
-Cc: linux-rdma@vger.kernel.org
diff --git a/drivers/staging/rdma/ipath/ipath_common.h b/drivers/staging/rdma/ipath/ipath_common.h
deleted file mode 100644 (file)
index 28cfe97..0000000
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_COMMON_H
-#define _IPATH_COMMON_H
-
-/*
- * This file contains defines, structures, etc. that are used
- * to communicate between kernel and user code.
- */
-
-
-/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
-#define IPATH_SRC_OUI_1 0x00
-#define IPATH_SRC_OUI_2 0x11
-#define IPATH_SRC_OUI_3 0x75
-
-/* version of protocol header (known to chip also). In the long run,
- * we should be able to generate and accept a range of version numbers;
- * for now we only accept one, and it's compiled in.
- */
-#define IPS_PROTO_VERSION 2
-
-/*
- * These are compile time constants that you may want to enable or disable
- * if you are trying to debug problems with code or performance.
- * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
- * fastpath code
- * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
- * traced in faspath code
- * _IPATH_TRACING define as 0 if you want to remove all tracing in a
- * compilation unit
- * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
- */
-
-/*
- * The value in the BTH QP field that InfiniPath uses to differentiate
- * an infinipath protocol IB packet vs standard IB transport
- */
-#define IPATH_KD_QP 0x656b79
-
-/*
- * valid states passed to ipath_set_linkstate() user call
- */
-#define IPATH_IB_LINKDOWN              0
-#define IPATH_IB_LINKARM               1
-#define IPATH_IB_LINKACTIVE            2
-#define IPATH_IB_LINKDOWN_ONLY         3
-#define IPATH_IB_LINKDOWN_SLEEP                4
-#define IPATH_IB_LINKDOWN_DISABLE      5
-#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
-#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
-#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
-#define IPATH_IB_LINK_HRTBT    9 /* enable heartbeat, normal, non-loopback */
-
-/*
- * These 3 values (SDR and DDR may be ORed for auto-speed
- * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
- * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs.  They
- * are also the the possible values for ipath_link_speed_enabled and active
- * The values were chosen to match values used within the IB spec.
- */
-#define IPATH_IB_SDR 1
-#define IPATH_IB_DDR 2
-
-/*
- * stats maintained by the driver.  For now, at least, this is global
- * to all minor devices.
- */
-struct infinipath_stats {
-       /* number of interrupts taken */
-       __u64 sps_ints;
-       /* number of interrupts for errors */
-       __u64 sps_errints;
-       /* number of errors from chip (not incl. packet errors or CRC) */
-       __u64 sps_errs;
-       /* number of packet errors from chip other than CRC */
-       __u64 sps_pkterrs;
-       /* number of packets with CRC errors (ICRC and VCRC) */
-       __u64 sps_crcerrs;
-       /* number of hardware errors reported (parity, etc.) */
-       __u64 sps_hwerrs;
-       /* number of times IB link changed state unexpectedly */
-       __u64 sps_iblink;
-       __u64 sps_unused; /* was fastrcvint, no longer implemented */
-       /* number of kernel (port0) packets received */
-       __u64 sps_port0pkts;
-       /* number of "ethernet" packets sent by driver */
-       __u64 sps_ether_spkts;
-       /* number of "ethernet" packets received by driver */
-       __u64 sps_ether_rpkts;
-       /* number of SMA packets sent by driver. Obsolete. */
-       __u64 sps_sma_spkts;
-       /* number of SMA packets received by driver. Obsolete. */
-       __u64 sps_sma_rpkts;
-       /* number of times all ports rcvhdrq was full and packet dropped */
-       __u64 sps_hdrqfull;
-       /* number of times all ports egrtid was full and packet dropped */
-       __u64 sps_etidfull;
-       /*
-        * number of times we tried to send from driver, but no pio buffers
-        * avail
-        */
-       __u64 sps_nopiobufs;
-       /* number of ports currently open */
-       __u64 sps_ports;
-       /* list of pkeys (other than default) accepted (0 means not set) */
-       __u16 sps_pkeys[4];
-       __u16 sps_unused16[4]; /* available; maintaining compatible layout */
-       /* number of user ports per chip (not IB ports) */
-       __u32 sps_nports;
-       /* not our interrupt, or already handled */
-       __u32 sps_nullintr;
-       /* max number of packets handled per receive call */
-       __u32 sps_maxpkts_call;
-       /* avg number of packets handled per receive call */
-       __u32 sps_avgpkts_call;
-       /* total number of pages locked */
-       __u64 sps_pagelocks;
-       /* total number of pages unlocked */
-       __u64 sps_pageunlocks;
-       /*
-        * Number of packets dropped in kernel other than errors (ether
-        * packets if ipath not configured, etc.)
-        */
-       __u64 sps_krdrops;
-       __u64 sps_txeparity; /* PIO buffer parity error, recovered */
-       /* pad for future growth */
-       __u64 __sps_pad[45];
-};
-
-/*
- * These are the status bits readable (in ascii form, 64bit value)
- * from the "status" sysfs file.
- */
-#define IPATH_STATUS_INITTED       0x1 /* basic initialization done */
-#define IPATH_STATUS_DISABLED      0x2 /* hardware disabled */
-/* Device has been disabled via admin request */
-#define IPATH_STATUS_ADMIN_DISABLED    0x4
-/* Chip has been found and initted */
-#define IPATH_STATUS_CHIP_PRESENT 0x20
-/* IB link is at ACTIVE, usable for data traffic */
-#define IPATH_STATUS_IB_READY     0x40
-/* link is configured, LID, MTU, etc. have been set */
-#define IPATH_STATUS_IB_CONF      0x80
-/* no link established, probably no cable */
-#define IPATH_STATUS_IB_NOCABLE  0x100
-/* A Fatal hardware error has occurred. */
-#define IPATH_STATUS_HWERROR     0x200
-
-/*
- * The list of usermode accessible registers.  Also see Reg_* later in file.
- */
-typedef enum _ipath_ureg {
-       /* (RO)  DMA RcvHdr to be used next. */
-       ur_rcvhdrtail = 0,
-       /* (RW)  RcvHdr entry to be processed next by host. */
-       ur_rcvhdrhead = 1,
-       /* (RO)  Index of next Eager index to use. */
-       ur_rcvegrindextail = 2,
-       /* (RW)  Eager TID to be processed next */
-       ur_rcvegrindexhead = 3,
-       /* For internal use only; max register number. */
-       _IPATH_UregMax
-} ipath_ureg;
-
-/* bit values for spi_runtime_flags */
-#define IPATH_RUNTIME_HT       0x1
-#define IPATH_RUNTIME_PCIE     0x2
-#define IPATH_RUNTIME_FORCE_WC_ORDER   0x4
-#define IPATH_RUNTIME_RCVHDR_COPY      0x8
-#define IPATH_RUNTIME_MASTER   0x10
-#define IPATH_RUNTIME_NODMA_RTAIL 0x80
-#define IPATH_RUNTIME_SDMA           0x200
-#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
-#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
-
-/*
- * This structure is returned by ipath_userinit() immediately after
- * open to get implementation-specific info, and info specific to this
- * instance.
- *
- * This struct must have explict pad fields where type sizes
- * may result in different alignments between 32 and 64 bit
- * programs, since the 64 bit * bit kernel requires the user code
- * to have matching offsets
- */
-struct ipath_base_info {
-       /* version of hardware, for feature checking. */
-       __u32 spi_hw_version;
-       /* version of software, for feature checking. */
-       __u32 spi_sw_version;
-       /* InfiniPath port assigned, goes into sent packets */
-       __u16 spi_port;
-       __u16 spi_subport;
-       /*
-        * IB MTU, packets IB data must be less than this.
-        * The MTU is in bytes, and will be a multiple of 4 bytes.
-        */
-       __u32 spi_mtu;
-       /*
-        * Size of a PIO buffer.  Any given packet's total size must be less
-        * than this (in words).  Included is the starting control word, so
-        * if 513 is returned, then total pkt size is 512 words or less.
-        */
-       __u32 spi_piosize;
-       /* size of the TID cache in infinipath, in entries */
-       __u32 spi_tidcnt;
-       /* size of the TID Eager list in infinipath, in entries */
-       __u32 spi_tidegrcnt;
-       /* size of a single receive header queue entry in words. */
-       __u32 spi_rcvhdrent_size;
-       /*
-        * Count of receive header queue entries allocated.
-        * This may be less than the spu_rcvhdrcnt passed in!.
-        */
-       __u32 spi_rcvhdr_cnt;
-
-       /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
-       __u32 spi_runtime_flags;
-
-       /* address where receive buffer queue is mapped into */
-       __u64 spi_rcvhdr_base;
-
-       /* user program. */
-
-       /* base address of eager TID receive buffers. */
-       __u64 spi_rcv_egrbufs;
-
-       /* Allocated by initialization code, not by protocol. */
-
-       /*
-        * Size of each TID buffer in host memory, starting at
-        * spi_rcv_egrbufs.  The buffers are virtually contiguous.
-        */
-       __u32 spi_rcv_egrbufsize;
-       /*
-        * The special QP (queue pair) value that identifies an infinipath
-        * protocol packet from standard IB packets.  More, probably much
-        * more, to be added.
-        */
-       __u32 spi_qpair;
-
-       /*
-        * User register base for init code, not to be used directly by
-        * protocol or applications.
-        */
-       __u64 __spi_uregbase;
-       /*
-        * Maximum buffer size in bytes that can be used in a single TID
-        * entry (assuming the buffer is aligned to this boundary).  This is
-        * the minimum of what the hardware and software support Guaranteed
-        * to be a power of 2.
-        */
-       __u32 spi_tid_maxsize;
-       /*
-        * alignment of each pio send buffer (byte count
-        * to add to spi_piobufbase to get to second buffer)
-        */
-       __u32 spi_pioalign;
-       /*
-        * The index of the first pio buffer available to this process;
-        * needed to do lookup in spi_pioavailaddr; not added to
-        * spi_piobufbase.
-        */
-       __u32 spi_pioindex;
-        /* number of buffers mapped for this process */
-       __u32 spi_piocnt;
-
-       /*
-        * Base address of writeonly pio buffers for this process.
-        * Each buffer has spi_piosize words, and is aligned on spi_pioalign
-        * boundaries.  spi_piocnt buffers are mapped from this address
-        */
-       __u64 spi_piobufbase;
-
-       /*
-        * Base address of readonly memory copy of the pioavail registers.
-        * There are 2 bits for each buffer.
-        */
-       __u64 spi_pioavailaddr;
-
-       /*
-        * Address where driver updates a copy of the interface and driver
-        * status (IPATH_STATUS_*) as a 64 bit value.  It's followed by a
-        * string indicating hardware error, if there was one.
-        */
-       __u64 spi_status;
-
-       /* number of chip ports available to user processes */
-       __u32 spi_nports;
-       /* unit number of chip we are using */
-       __u32 spi_unit;
-       /* num bufs in each contiguous set */
-       __u32 spi_rcv_egrperchunk;
-       /* size in bytes of each contiguous set */
-       __u32 spi_rcv_egrchunksize;
-       /* total size of mmap to cover full rcvegrbuffers */
-       __u32 spi_rcv_egrbuftotlen;
-       __u32 spi_filler_for_align;
-       /* address of readonly memory copy of the rcvhdrq tail register. */
-       __u64 spi_rcvhdr_tailaddr;
-
-       /* shared memory pages for subports if port is shared */
-       __u64 spi_subport_uregbase;
-       __u64 spi_subport_rcvegrbuf;
-       __u64 spi_subport_rcvhdr_base;
-
-       /* shared memory page for hardware port if it is shared */
-       __u64 spi_port_uregbase;
-       __u64 spi_port_rcvegrbuf;
-       __u64 spi_port_rcvhdr_base;
-       __u64 spi_port_rcvhdr_tailaddr;
-
-} __attribute__ ((aligned(8)));
-
-
-/*
- * This version number is given to the driver by the user code during
- * initialization in the spu_userversion field of ipath_user_info, so
- * the driver can check for compatibility with user code.
- *
- * The major version changes when data structures
- * change in an incompatible way.  The driver must be the same or higher
- * for initialization to succeed.  In some cases, a higher version
- * driver will not interoperate with older software, and initialization
- * will return an error.
- */
-#define IPATH_USER_SWMAJOR 1
-
-/*
- * Minor version differences are always compatible
- * a within a major version, however if user software is larger
- * than driver software, some new features and/or structure fields
- * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference.
- */
-#define IPATH_USER_SWMINOR 6
-
-#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
-
-#define IPATH_KERN_TYPE 0
-
-/*
- * Similarly, this is the kernel version going back to the user.  It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a QLogic release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of ipath_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
-
-/*
- * This structure is passed to ipath_userinit() to tell the driver where
- * user code buffers are, sizes, etc.   The offsets and sizes of the
- * fields must remain unchanged, for binary compatibility.  It can
- * be extended, if userversion is changed so user code can tell, if needed
- */
-struct ipath_user_info {
-       /*
-        * version of user software, to detect compatibility issues.
-        * Should be set to IPATH_USER_SWVERSION.
-        */
-       __u32 spu_userversion;
-
-       /* desired number of receive header queue entries */
-       __u32 spu_rcvhdrcnt;
-
-       /* size of struct base_info to write to */
-       __u32 spu_base_info_size;
-
-       /*
-        * number of words in KD protocol header
-        * This tells InfiniPath how many words to copy to rcvhdrq.  If 0,
-        * kernel uses a default.  Once set, attempts to set any other value
-        * are an error (EAGAIN) until driver is reloaded.
-        */
-       __u32 spu_rcvhdrsize;
-
-       /*
-        * If two or more processes wish to share a port, each process
-        * must set the spu_subport_cnt and spu_subport_id to the same
-        * values.  The only restriction on the spu_subport_id is that
-        * it be unique for a given node.
-        */
-       __u16 spu_subport_cnt;
-       __u16 spu_subport_id;
-
-       __u32 spu_unused; /* kept for compatible layout */
-
-       /*
-        * address of struct base_info to write to
-        */
-       __u64 spu_base_info;
-
-} __attribute__ ((aligned(8)));
-
-/* User commands. */
-
-#define IPATH_CMD_MIN          16
-
-#define __IPATH_CMD_USER_INIT  16      /* old set up userspace (for old user code) */
-#define IPATH_CMD_PORT_INFO    17      /* find out what resources we got */
-#define IPATH_CMD_RECV_CTRL    18      /* control receipt of packets */
-#define IPATH_CMD_TID_UPDATE   19      /* update expected TID entries */
-#define IPATH_CMD_TID_FREE     20      /* free expected TID entries */
-#define IPATH_CMD_SET_PART_KEY 21      /* add partition key */
-#define __IPATH_CMD_SLAVE_INFO 22      /* return info on slave processes (for old user code) */
-#define IPATH_CMD_ASSIGN_PORT  23      /* allocate HCA and port */
-#define IPATH_CMD_USER_INIT    24      /* set up userspace */
-#define IPATH_CMD_UNUSED_1     25
-#define IPATH_CMD_UNUSED_2     26
-#define IPATH_CMD_PIOAVAILUPD  27      /* force an update of PIOAvail reg */
-#define IPATH_CMD_POLL_TYPE    28      /* set the kind of polling we want */
-#define IPATH_CMD_ARMLAUNCH_CTRL       29 /* armlaunch detection control */
-/* 30 is unused */
-#define IPATH_CMD_SDMA_INFLIGHT 31     /* sdma inflight counter request */
-#define IPATH_CMD_SDMA_COMPLETE 32     /* sdma completion counter request */
-
-/*
- * Poll types
- */
-#define IPATH_POLL_TYPE_URGENT  0x01
-#define IPATH_POLL_TYPE_OVERFLOW 0x02
-
-struct ipath_port_info {
-       __u32 num_active;       /* number of active units */
-       __u32 unit;             /* unit (chip) assigned to caller */
-       __u16 port;             /* port on unit assigned to caller */
-       __u16 subport;          /* subport on unit assigned to caller */
-       __u16 num_ports;        /* number of ports available on unit */
-       __u16 num_subports;     /* number of subports opened on port */
-};
-
-struct ipath_tid_info {
-       __u32 tidcnt;
-       /* make structure same size in 32 and 64 bit */
-       __u32 tid__unused;
-       /* virtual address of first page in transfer */
-       __u64 tidvaddr;
-       /* pointer (same size 32/64 bit) to __u16 tid array */
-       __u64 tidlist;
-
-       /*
-        * pointer (same size 32/64 bit) to bitmap of TIDs used
-        * for this call; checked for being large enough at open
-        */
-       __u64 tidmap;
-};
-
-struct ipath_cmd {
-       __u32 type;                     /* command type */
-       union {
-               struct ipath_tid_info tid_info;
-               struct ipath_user_info user_info;
-
-               /*
-                * address in userspace where we should put the sdma
-                * inflight counter
-                */
-               __u64 sdma_inflight;
-               /*
-                * address in userspace where we should put the sdma
-                * completion counter
-                */
-               __u64 sdma_complete;
-               /* address in userspace of struct ipath_port_info to
-                  write result to */
-               __u64 port_info;
-               /* enable/disable receipt of packets */
-               __u32 recv_ctrl;
-               /* enable/disable armlaunch errors (non-zero to enable) */
-               __u32 armlaunch_ctrl;
-               /* partition key to set */
-               __u16 part_key;
-               /* user address of __u32 bitmask of active slaves */
-               __u64 slave_mask_addr;
-               /* type of polling we want */
-               __u16 poll_type;
-       } cmd;
-};
-
-struct ipath_iovec {
-       /* Pointer to data, but same size 32 and 64 bit */
-       __u64 iov_base;
-
-       /*
-        * Length of data; don't need 64 bits, but want
-        * ipath_sendpkt to remain same size as before 32 bit changes, so...
-        */
-       __u64 iov_len;
-};
-
-/*
- * Describes a single packet for send.  Each packet can have one or more
- * buffers, but the total length (exclusive of IB headers) must be less
- * than the MTU, and if using the PIO method, entire packet length,
- * including IB headers, must be less than the ipath_piosize value (words).
- * Use of this necessitates including sys/uio.h
- */
-struct __ipath_sendpkt {
-       __u32 sps_flags;        /* flags for packet (TBD) */
-       __u32 sps_cnt;          /* number of entries to use in sps_iov */
-       /* array of iov's describing packet. TEMPORARY */
-       struct ipath_iovec sps_iov[4];
-};
-
-/*
- * diagnostics can send a packet by "writing" one of the following
- * two structs to diag data special file
- * The first is the legacy version for backward compatibility
- */
-struct ipath_diag_pkt {
-       __u32 unit;
-       __u64 data;
-       __u32 len;
-};
-
-/* The second diag_pkt struct is the expanded version that allows
- * more control over the packet, specifically, by allowing a custom
- * pbc (+ static rate) qword, so that special modes and deliberate
- * changes to CRCs can be used. The elements were also re-ordered
- * for better alignment and to avoid padding issues.
- */
-struct ipath_diag_xpkt {
-       __u64 data;
-       __u64 pbc_wd;
-       __u32 unit;
-       __u32 len;
-};
-
-/*
- * Data layout in I2C flash (for GUID, etc.)
- * All fields are little-endian binary unless otherwise stated
- */
-#define IPATH_FLASH_VERSION 2
-struct ipath_flash {
-       /* flash layout version (IPATH_FLASH_VERSION) */
-       __u8 if_fversion;
-       /* checksum protecting if_length bytes */
-       __u8 if_csum;
-       /*
-        * valid length (in use, protected by if_csum), including
-        * if_fversion and if_csum themselves)
-        */
-       __u8 if_length;
-       /* the GUID, in network order */
-       __u8 if_guid[8];
-       /* number of GUIDs to use, starting from if_guid */
-       __u8 if_numguid;
-       /* the (last 10 characters of) board serial number, in ASCII */
-       char if_serial[12];
-       /* board mfg date (YYYYMMDD ASCII) */
-       char if_mfgdate[8];
-       /* last board rework/test date (YYYYMMDD ASCII) */
-       char if_testdate[8];
-       /* logging of error counts, TBD */
-       __u8 if_errcntp[4];
-       /* powered on hours, updated at driver unload */
-       __u8 if_powerhour[2];
-       /* ASCII free-form comment field */
-       char if_comment[32];
-       /* Backwards compatible prefix for longer QLogic Serial Numbers */
-       char if_sprefix[4];
-       /* 82 bytes used, min flash size is 128 bytes */
-       __u8 if_future[46];
-};
-
-/*
- * These are the counters implemented in the chip, and are listed in order.
- * The InterCaps naming is taken straight from the chip spec.
- */
-struct infinipath_counters {
-       __u64 LBIntCnt;
-       __u64 LBFlowStallCnt;
-       __u64 TxSDmaDescCnt;    /* was Reserved1 */
-       __u64 TxUnsupVLErrCnt;
-       __u64 TxDataPktCnt;
-       __u64 TxFlowPktCnt;
-       __u64 TxDwordCnt;
-       __u64 TxLenErrCnt;
-       __u64 TxMaxMinLenErrCnt;
-       __u64 TxUnderrunCnt;
-       __u64 TxFlowStallCnt;
-       __u64 TxDroppedPktCnt;
-       __u64 RxDroppedPktCnt;
-       __u64 RxDataPktCnt;
-       __u64 RxFlowPktCnt;
-       __u64 RxDwordCnt;
-       __u64 RxLenErrCnt;
-       __u64 RxMaxMinLenErrCnt;
-       __u64 RxICRCErrCnt;
-       __u64 RxVCRCErrCnt;
-       __u64 RxFlowCtrlErrCnt;
-       __u64 RxBadFormatCnt;
-       __u64 RxLinkProblemCnt;
-       __u64 RxEBPCnt;
-       __u64 RxLPCRCErrCnt;
-       __u64 RxBufOvflCnt;
-       __u64 RxTIDFullErrCnt;
-       __u64 RxTIDValidErrCnt;
-       __u64 RxPKeyMismatchCnt;
-       __u64 RxP0HdrEgrOvflCnt;
-       __u64 RxP1HdrEgrOvflCnt;
-       __u64 RxP2HdrEgrOvflCnt;
-       __u64 RxP3HdrEgrOvflCnt;
-       __u64 RxP4HdrEgrOvflCnt;
-       __u64 RxP5HdrEgrOvflCnt;
-       __u64 RxP6HdrEgrOvflCnt;
-       __u64 RxP7HdrEgrOvflCnt;
-       __u64 RxP8HdrEgrOvflCnt;
-       __u64 RxP9HdrEgrOvflCnt;        /* was Reserved6 */
-       __u64 RxP10HdrEgrOvflCnt;       /* was Reserved7 */
-       __u64 RxP11HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP12HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP13HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP14HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP15HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 RxP16HdrEgrOvflCnt;       /* new for IBA7220 */
-       __u64 IBStatusChangeCnt;
-       __u64 IBLinkErrRecoveryCnt;
-       __u64 IBLinkDownedCnt;
-       __u64 IBSymbolErrCnt;
-       /* The following are new for IBA7220 */
-       __u64 RxVL15DroppedPktCnt;
-       __u64 RxOtherLocalPhyErrCnt;
-       __u64 PcieRetryBufDiagQwordCnt;
-       __u64 ExcessBufferOvflCnt;
-       __u64 LocalLinkIntegrityErrCnt;
-       __u64 RxVlErrCnt;
-       __u64 RxDlidFltrCnt;
-};
-
-/*
- * The next set of defines are for packet headers, and chip register
- * and memory bits that are visible to and/or used by user-mode software
- * The other bits that are used only by the driver or diags are in
- * ipath_registers.h
- */
-
-/* RcvHdrFlags bits */
-#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
-#define INFINIPATH_RHF_LENGTH_SHIFT 0
-#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
-#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
-#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
-#define INFINIPATH_RHF_SEQ_MASK 0xF
-#define INFINIPATH_RHF_SEQ_SHIFT 0
-#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
-#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
-#define INFINIPATH_RHF_H_ICRCERR   0x80000000
-#define INFINIPATH_RHF_H_VCRCERR   0x40000000
-#define INFINIPATH_RHF_H_PARITYERR 0x20000000
-#define INFINIPATH_RHF_H_LENERR    0x10000000
-#define INFINIPATH_RHF_H_MTUERR    0x08000000
-#define INFINIPATH_RHF_H_IHDRERR   0x04000000
-#define INFINIPATH_RHF_H_TIDERR    0x02000000
-#define INFINIPATH_RHF_H_MKERR     0x01000000
-#define INFINIPATH_RHF_H_IBERR     0x00800000
-#define INFINIPATH_RHF_H_ERR_MASK  0xFF800000
-#define INFINIPATH_RHF_L_USE_EGR   0x80000000
-#define INFINIPATH_RHF_L_SWA       0x00008000
-#define INFINIPATH_RHF_L_SWB       0x00004000
-
-/* infinipath header fields */
-#define INFINIPATH_I_VERS_MASK 0xF
-#define INFINIPATH_I_VERS_SHIFT 28
-#define INFINIPATH_I_PORT_MASK 0xF
-#define INFINIPATH_I_PORT_SHIFT 24
-#define INFINIPATH_I_TID_MASK 0x7FF
-#define INFINIPATH_I_TID_SHIFT 13
-#define INFINIPATH_I_OFFSET_MASK 0x1FFF
-#define INFINIPATH_I_OFFSET_SHIFT 0
-
-/* K_PktFlags bits */
-#define INFINIPATH_KPF_INTR 0x1
-#define INFINIPATH_KPF_SUBPORT_MASK 0x3
-#define INFINIPATH_KPF_SUBPORT_SHIFT 1
-
-#define INFINIPATH_MAX_SUBPORT 4
-
-/* SendPIO per-buffer control */
-#define INFINIPATH_SP_TEST    0x40
-#define INFINIPATH_SP_TESTEBP 0x20
-#define INFINIPATH_SP_TRIGGER_SHIFT  15
-
-/* SendPIOAvail bits */
-#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
-#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
-
-/* infinipath header format */
-struct ipath_header {
-       /*
-        * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
-        * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-        * Port 4, TID 11, offset 13.
-        */
-       __le32 ver_port_tid_offset;
-       __le16 chksum;
-       __le16 pkt_flags;
-};
-
-/* infinipath user message header format.
- * This structure contains the first 4 fields common to all protocols
- * that employ infinipath.
- */
-struct ipath_message_header {
-       __be16 lrh[4];
-       __be32 bth[3];
-       /* fields below this point are in host byte order */
-       struct ipath_header iph;
-       __u8 sub_opcode;
-};
-
-/* infinipath ethernet header format */
-struct ether_header {
-       __be16 lrh[4];
-       __be32 bth[3];
-       struct ipath_header iph;
-       __u8 sub_opcode;
-       __u8 cmd;
-       __be16 lid;
-       __u16 mac[3];
-       __u8 frag_num;
-       __u8 seq_num;
-       __le32 len;
-       /* MUST be of word size due to PIO write requirements */
-       __le32 csum;
-       __le16 csum_offset;
-       __le16 flags;
-       __u16 first_2_bytes;
-       __u8 unused[2];         /* currently unused */
-};
-
-
-/* IB - LRH header consts */
-#define IPATH_LRH_GRH 0x0003   /* 1. word of IB LRH - next header: GRH */
-#define IPATH_LRH_BTH 0x0002   /* 1. word of IB LRH - next header: BTH */
-
-/* misc. */
-#define SIZE_OF_CRC 1
-
-#define IPATH_DEFAULT_P_KEY 0xFFFF
-#define IPATH_PERMISSIVE_LID 0xFFFF
-#define IPATH_AETH_CREDIT_SHIFT 24
-#define IPATH_AETH_CREDIT_MASK 0x1F
-#define IPATH_AETH_CREDIT_INVAL 0x1F
-#define IPATH_PSN_MASK 0xFFFFFF
-#define IPATH_MSN_MASK 0xFFFFFF
-#define IPATH_QPN_MASK 0xFFFFFF
-#define IPATH_MULTICAST_LID_BASE 0xC000
-#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
-#define IPATH_MULTICAST_QPN 0xFFFFFF
-
-/* Receive Header Queue: receive type (from infinipath) */
-#define RCVHQ_RCV_TYPE_EXPECTED  0
-#define RCVHQ_RCV_TYPE_EAGER     1
-#define RCVHQ_RCV_TYPE_NON_KD    2
-#define RCVHQ_RCV_TYPE_ERROR     3
-
-
-/* sub OpCodes - ith4x  */
-#define IPATH_ITH4X_OPCODE_ENCAP 0x81
-#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
-
-#define IPATH_HEADER_QUEUE_WORDS 9
-
-/* functions for extracting fields from rcvhdrq entries for the driver.
- */
-static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
-{
-       return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
-}
-
-static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
-{
-       return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
-           & INFINIPATH_RHF_RCVTYPE_MASK;
-}
-
-static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
-{
-       return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
-               & INFINIPATH_RHF_LENGTH_MASK) << 2;
-}
-
-static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
-{
-       return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
-           & INFINIPATH_RHF_EGRINDEX_MASK;
-}
-
-static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
-{
-       return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
-               & INFINIPATH_RHF_SEQ_MASK;
-}
-
-static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
-{
-       return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
-               & INFINIPATH_RHF_HDRQ_OFFSET_MASK;
-}
-
-static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
-{
-       return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
-}
-
-static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
-{
-       return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
-           & INFINIPATH_I_VERS_MASK;
-}
-
-#endif                         /* _IPATH_COMMON_H */
diff --git a/drivers/staging/rdma/ipath/ipath_cq.c b/drivers/staging/rdma/ipath/ipath_cq.c
deleted file mode 100644 (file)
index e9dd911..0000000
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
-{
-       struct ipath_cq_wc *wc;
-       unsigned long flags;
-       u32 head;
-       u32 next;
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       /*
-        * Note that the head pointer might be writable by user processes.
-        * Take care to verify it is a sane value.
-        */
-       wc = cq->queue;
-       head = wc->head;
-       if (head >= (unsigned) cq->ibcq.cqe) {
-               head = cq->ibcq.cqe;
-               next = 0;
-       } else
-               next = head + 1;
-       if (unlikely(next == wc->tail)) {
-               spin_unlock_irqrestore(&cq->lock, flags);
-               if (cq->ibcq.event_handler) {
-                       struct ib_event ev;
-
-                       ev.device = cq->ibcq.device;
-                       ev.element.cq = &cq->ibcq;
-                       ev.event = IB_EVENT_CQ_ERR;
-                       cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-               }
-               return;
-       }
-       if (cq->ip) {
-               wc->uqueue[head].wr_id = entry->wr_id;
-               wc->uqueue[head].status = entry->status;
-               wc->uqueue[head].opcode = entry->opcode;
-               wc->uqueue[head].vendor_err = entry->vendor_err;
-               wc->uqueue[head].byte_len = entry->byte_len;
-               wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
-               wc->uqueue[head].qp_num = entry->qp->qp_num;
-               wc->uqueue[head].src_qp = entry->src_qp;
-               wc->uqueue[head].wc_flags = entry->wc_flags;
-               wc->uqueue[head].pkey_index = entry->pkey_index;
-               wc->uqueue[head].slid = entry->slid;
-               wc->uqueue[head].sl = entry->sl;
-               wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-               wc->uqueue[head].port_num = entry->port_num;
-               /* Make sure entry is written before the head index. */
-               smp_wmb();
-       } else
-               wc->kqueue[head] = *entry;
-       wc->head = next;
-
-       if (cq->notify == IB_CQ_NEXT_COMP ||
-           (cq->notify == IB_CQ_SOLICITED && solicited)) {
-               cq->notify = IB_CQ_NONE;
-               cq->triggered++;
-               /*
-                * This will cause send_complete() to be called in
-                * another thread.
-                */
-               tasklet_hi_schedule(&cq->comptask);
-       }
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       if (entry->status != IB_WC_SUCCESS)
-               to_idev(cq->ibcq.device)->n_wqe_errs++;
-}
-
-/**
- * ipath_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       struct ipath_cq_wc *wc;
-       unsigned long flags;
-       int npolled;
-       u32 tail;
-
-       /* The kernel can only poll a kernel completion queue */
-       if (cq->ip) {
-               npolled = -EINVAL;
-               goto bail;
-       }
-
-       spin_lock_irqsave(&cq->lock, flags);
-
-       wc = cq->queue;
-       tail = wc->tail;
-       if (tail > (u32) cq->ibcq.cqe)
-               tail = (u32) cq->ibcq.cqe;
-       for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-               if (tail == wc->head)
-                       break;
-               /* The kernel doesn't need a RMB since it has the lock. */
-               *entry = wc->kqueue[tail];
-               if (tail >= cq->ibcq.cqe)
-                       tail = 0;
-               else
-                       tail++;
-       }
-       wc->tail = tail;
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-       return npolled;
-}
-
-static void send_complete(unsigned long data)
-{
-       struct ipath_cq *cq = (struct ipath_cq *)data;
-
-       /*
-        * The completion handler will most likely rearm the notification
-        * and poll for all pending entries.  If a new completion entry
-        * is added while we are in this routine, tasklet_hi_schedule()
-        * won't call us again until we return so we check triggered to
-        * see if we need to call the handler again.
-        */
-       for (;;) {
-               u8 triggered = cq->triggered;
-
-               cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-
-               if (cq->triggered == triggered)
-                       return;
-       }
-}
-
-/**
- * ipath_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the InfiniPath driver
- * @udata: unused by the InfiniPath driver
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-                             const struct ib_cq_init_attr *attr,
-                             struct ib_ucontext *context,
-                             struct ib_udata *udata)
-{
-       int entries = attr->cqe;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cq *cq;
-       struct ipath_cq_wc *wc;
-       struct ib_cq *ret;
-       u32 sz;
-
-       if (attr->flags)
-               return ERR_PTR(-EINVAL);
-
-       if (entries < 1 || entries > ib_ipath_max_cqes) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       /* Allocate the completion queue structure. */
-       cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-       if (!cq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto done;
-       }
-
-       /*
-        * Allocate the completion queue entries and head/tail pointers.
-        * This is allocated separately so that it can be resized and
-        * also mapped into user space.
-        * We need to use vmalloc() in order to support mmap and large
-        * numbers of entries.
-        */
-       sz = sizeof(*wc);
-       if (udata && udata->outlen >= sizeof(__u64))
-               sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-       else
-               sz += sizeof(struct ib_wc) * (entries + 1);
-       wc = vmalloc_user(sz);
-       if (!wc) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_cq;
-       }
-
-       /*
-        * Return the address of the WC as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               int err;
-
-               cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
-               if (!cq->ip) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_wc;
-               }
-
-               err = ib_copy_to_udata(udata, &cq->ip->offset,
-                                      sizeof(cq->ip->offset));
-               if (err) {
-                       ret = ERR_PTR(err);
-                       goto bail_ip;
-               }
-       } else
-               cq->ip = NULL;
-
-       spin_lock(&dev->n_cqs_lock);
-       if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
-               spin_unlock(&dev->n_cqs_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_cqs_allocated++;
-       spin_unlock(&dev->n_cqs_lock);
-
-       if (cq->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       /*
-        * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-        * The number of entries should be >= the number requested or return
-        * an error.
-        */
-       cq->ibcq.cqe = entries;
-       cq->notify = IB_CQ_NONE;
-       cq->triggered = 0;
-       spin_lock_init(&cq->lock);
-       tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-       wc->head = 0;
-       wc->tail = 0;
-       cq->queue = wc;
-
-       ret = &cq->ibcq;
-
-       goto done;
-
-bail_ip:
-       kfree(cq->ip);
-bail_wc:
-       vfree(wc);
-bail_cq:
-       kfree(cq);
-done:
-       return ret;
-}
-
-/**
- * ipath_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int ipath_destroy_cq(struct ib_cq *ibcq)
-{
-       struct ipath_ibdev *dev = to_idev(ibcq->device);
-       struct ipath_cq *cq = to_icq(ibcq);
-
-       tasklet_kill(&cq->comptask);
-       spin_lock(&dev->n_cqs_lock);
-       dev->n_cqs_allocated--;
-       spin_unlock(&dev->n_cqs_lock);
-       if (cq->ip)
-               kref_put(&cq->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(cq->queue);
-       kfree(cq);
-
-       return 0;
-}
-
-/**
- * ipath_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       unsigned long flags;
-       int ret = 0;
-
-       spin_lock_irqsave(&cq->lock, flags);
-       /*
-        * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-        * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-        */
-       if (cq->notify != IB_CQ_NEXT_COMP)
-               cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-       if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-           cq->queue->head != cq->queue->tail)
-               ret = 1;
-
-       spin_unlock_irqrestore(&cq->lock, flags);
-
-       return ret;
-}
-
-/**
- * ipath_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-       struct ipath_cq *cq = to_icq(ibcq);
-       struct ipath_cq_wc *old_wc;
-       struct ipath_cq_wc *wc;
-       u32 head, tail, n;
-       int ret;
-       u32 sz;
-
-       if (cqe < 1 || cqe > ib_ipath_max_cqes) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * Need to use vmalloc() if we want to support large #s of entries.
-        */
-       sz = sizeof(*wc);
-       if (udata && udata->outlen >= sizeof(__u64))
-               sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-       else
-               sz += sizeof(struct ib_wc) * (cqe + 1);
-       wc = vmalloc_user(sz);
-       if (!wc) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       /* Check that we can write the offset to mmap. */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               __u64 offset = 0;
-
-               ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-               if (ret)
-                       goto bail_free;
-       }
-
-       spin_lock_irq(&cq->lock);
-       /*
-        * Make sure head and tail are sane since they
-        * might be user writable.
-        */
-       old_wc = cq->queue;
-       head = old_wc->head;
-       if (head > (u32) cq->ibcq.cqe)
-               head = (u32) cq->ibcq.cqe;
-       tail = old_wc->tail;
-       if (tail > (u32) cq->ibcq.cqe)
-               tail = (u32) cq->ibcq.cqe;
-       if (head < tail)
-               n = cq->ibcq.cqe + 1 + head - tail;
-       else
-               n = head - tail;
-       if (unlikely((u32)cqe < n)) {
-               ret = -EINVAL;
-               goto bail_unlock;
-       }
-       for (n = 0; tail != head; n++) {
-               if (cq->ip)
-                       wc->uqueue[n] = old_wc->uqueue[tail];
-               else
-                       wc->kqueue[n] = old_wc->kqueue[tail];
-               if (tail == (u32) cq->ibcq.cqe)
-                       tail = 0;
-               else
-                       tail++;
-       }
-       cq->ibcq.cqe = cqe;
-       wc->head = n;
-       wc->tail = 0;
-       cq->queue = wc;
-       spin_unlock_irq(&cq->lock);
-
-       vfree(old_wc);
-
-       if (cq->ip) {
-               struct ipath_ibdev *dev = to_idev(ibcq->device);
-               struct ipath_mmap_info *ip = cq->ip;
-
-               ipath_update_mmap_info(dev, ip, sz, wc);
-
-               /*
-                * Return the offset to mmap.
-                * See ipath_mmap() for details.
-                */
-               if (udata && udata->outlen >= sizeof(__u64)) {
-                       ret = ib_copy_to_udata(udata, &ip->offset,
-                                              sizeof(ip->offset));
-                       if (ret)
-                               goto bail;
-               }
-
-               spin_lock_irq(&dev->pending_lock);
-               if (list_empty(&ip->pending_mmaps))
-                       list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = 0;
-       goto bail;
-
-bail_unlock:
-       spin_unlock_irq(&cq->lock);
-bail_free:
-       vfree(wc);
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_debug.h b/drivers/staging/rdma/ipath/ipath_debug.h
deleted file mode 100644 (file)
index 65926cd..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_DEBUG_H
-#define _IPATH_DEBUG_H
-
-#ifndef _IPATH_DEBUGGING       /* debugging enabled or not */
-#define _IPATH_DEBUGGING 1
-#endif
-
-#if _IPATH_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or disable
- * dynamically.  This can be set at modprobe time also:
- *      modprobe infinipath.ko infinipath_debug=7
- */
-
-#define __IPATH_INFO        0x1        /* generic low verbosity stuff */
-#define __IPATH_DBG         0x2        /* generic debug */
-#define __IPATH_TRSAMPLE    0x8        /* generate trace buffer sample entries */
-/* leave some low verbosity spots open */
-#define __IPATH_VERBDBG     0x40       /* very verbose debug */
-#define __IPATH_PKTDBG      0x80       /* print packet data */
-/* print process startup (init)/exit messages */
-#define __IPATH_PROCDBG     0x100
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG       0x200
-#define __IPATH_ERRPKTDBG   0x400
-#define __IPATH_USER_SEND   0x1000     /* use user mode send */
-#define __IPATH_KERNEL_SEND 0x2000     /* use kernel mode send */
-#define __IPATH_EPKTDBG     0x4000     /* print ethernet packet data */
-#define __IPATH_IPATHDBG    0x10000    /* Ethernet (IPATH) gen debug */
-#define __IPATH_IPATHWARN   0x20000    /* Ethernet (IPATH) warnings */
-#define __IPATH_IPATHERR    0x40000    /* Ethernet (IPATH) errors */
-#define __IPATH_IPATHPD     0x80000    /* Ethernet (IPATH) packet dump */
-#define __IPATH_IPATHTABLE  0x100000   /* Ethernet (IPATH) table dump */
-#define __IPATH_LINKVERBDBG 0x200000   /* very verbose linkchange debug */
-
-#else                          /* _IPATH_DEBUGGING */
-
-/*
- * define all of these even with debugging off, for the few places that do
- * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
- * compiler eliminate the code
- */
-
-#define __IPATH_INFO      0x0  /* generic low verbosity stuff */
-#define __IPATH_DBG       0x0  /* generic debug */
-#define __IPATH_TRSAMPLE  0x0  /* generate trace buffer sample entries */
-#define __IPATH_VERBDBG   0x0  /* very verbose debug */
-#define __IPATH_PKTDBG    0x0  /* print packet data */
-#define __IPATH_PROCDBG   0x0  /* process startup (init)/exit messages */
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG     0x0
-#define __IPATH_EPKTDBG   0x0  /* print ethernet packet data */
-#define __IPATH_IPATHDBG  0x0  /* Ethernet (IPATH) table dump on */
-#define __IPATH_IPATHWARN 0x0  /* Ethernet (IPATH) warnings on   */
-#define __IPATH_IPATHERR  0x0  /* Ethernet (IPATH) errors on   */
-#define __IPATH_IPATHPD   0x0  /* Ethernet (IPATH) packet dump on   */
-#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on   */
-#define __IPATH_LINKVERBDBG 0x0        /* very verbose linkchange debug */
-
-#endif                         /* _IPATH_DEBUGGING */
-
-#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
-
-#endif                         /* _IPATH_DEBUG_H */
diff --git a/drivers/staging/rdma/ipath/ipath_diag.c b/drivers/staging/rdma/ipath/ipath_diag.c
deleted file mode 100644 (file)
index 45802e9..0000000
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains support for diagnostic functions.  It is accessed by
- * opening the ipath_diag device, normally minor number 129.  Diagnostic use
- * of the InfiniPath chip may render the chip or board unusable until the
- * driver is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/export.h>
-#include <asm/uaccess.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-int ipath_diag_inuse;
-static int diag_set_link;
-
-static int ipath_diag_open(struct inode *in, struct file *fp);
-static int ipath_diag_release(struct inode *in, struct file *fp);
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-                              size_t count, loff_t *off);
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-                               size_t count, loff_t *off);
-
-static const struct file_operations diag_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_diag_write,
-       .read = ipath_diag_read,
-       .open = ipath_diag_open,
-       .release = ipath_diag_release,
-       .llseek = default_llseek,
-};
-
-static ssize_t ipath_diagpkt_write(struct file *fp,
-                                  const char __user *data,
-                                  size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_diagpkt_write,
-       .llseek = noop_llseek,
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev *diagpkt_cdev;
-static struct device *diagpkt_dev;
-
-int ipath_diag_add(struct ipath_devdata *dd)
-{
-       char name[16];
-       int ret = 0;
-
-       if (atomic_inc_return(&diagpkt_count) == 1) {
-               ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
-                                     "ipath_diagpkt", &diagpkt_file_ops,
-                                     &diagpkt_cdev, &diagpkt_dev);
-
-               if (ret) {
-                       ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
-                                     "device: %d", ret);
-                       goto done;
-               }
-       }
-
-       snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
-
-       ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
-                             &diag_file_ops, &dd->diag_cdev,
-                             &dd->diag_dev);
-       if (ret)
-               ipath_dev_err(dd, "Couldn't create %s device: %d",
-                             name, ret);
-
-done:
-       return ret;
-}
-
-void ipath_diag_remove(struct ipath_devdata *dd)
-{
-       if (atomic_dec_and_test(&diagpkt_count))
-               ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
-
-       ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
-}
-
-/**
- * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy (multiple of 32 bits)
- *
- * This function also localizes all chip memory accesses.
- * The copy should be written such that we read full cacheline packets
- * from the chip.  This is usually used for a single qword
- *
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
-                            const void __iomem *caddr, size_t count)
-{
-       const u64 __iomem *reg_addr = caddr;
-       const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-       int ret;
-
-       /* not very efficient, but it works for now */
-       if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u64 data = readq(reg_addr);
-               if (copy_to_user(uaddr, &data, sizeof(u64))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               reg_addr++;
-               uaddr += sizeof(u64);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: the number of bytes to copy (multiple of 32 bits)
- *
- * This is usually used for a single qword
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-
-static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
-                             const void __user *uaddr, size_t count)
-{
-       u64 __iomem *reg_addr = caddr;
-       const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-       int ret;
-
-       /* not very efficient, but it works for now */
-       if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u64 data;
-               if (copy_from_user(&data, uaddr, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               writeq(data, reg_addr);
-
-               reg_addr++;
-               uaddr += sizeof(u64);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy
- *
- * read 32 bit values, not 64 bit; for memories that only
- * support 32 bit reads; usually a single dword.
- */
-static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
-                            const void __iomem *caddr, size_t count)
-{
-       const u32 __iomem *reg_addr = caddr;
-       const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-       int ret;
-
-       if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-           reg_end > (u32 __iomem *) dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       /* not very efficient, but it works for now */
-       while (reg_addr < reg_end) {
-               u32 data = readl(reg_addr);
-               if (copy_to_user(uaddr, &data, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-
-               reg_addr++;
-               uaddr += sizeof(u32);
-
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: number of bytes to copy
- *
- * write 32 bit values, not 64 bit; for memories that only
- * support 32 bit write; usually a single dword.
- */
-
-static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
-                             const void __user *uaddr, size_t count)
-{
-       u32 __iomem *reg_addr = caddr;
-       const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-       int ret;
-
-       if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-           reg_end > (u32 __iomem *) dd->ipath_kregend) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       while (reg_addr < reg_end) {
-               u32 data;
-               if (copy_from_user(&data, uaddr, sizeof(data))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               writel(data, reg_addr);
-
-               reg_addr++;
-               uaddr += sizeof(u32);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-static int ipath_diag_open(struct inode *in, struct file *fp)
-{
-       int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
-       struct ipath_devdata *dd;
-       int ret;
-
-       mutex_lock(&ipath_mutex);
-
-       if (ipath_diag_inuse) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       dd = ipath_lookup(unit);
-
-       if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
-           !dd->ipath_kregbase) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       fp->private_data = dd;
-       ipath_diag_inuse = -2;
-       diag_set_link = 0;
-       ret = 0;
-
-       /* Only expose a way to reset the device if we
-          make it into diag mode. */
-       ipath_expose_reset(&dd->pcidev->dev);
-
-bail:
-       mutex_unlock(&ipath_mutex);
-
-       return ret;
-}
-
-/**
- * ipath_diagpkt_write - write an IB packet
- * @fp: the diag data device file pointer
- * @data: ipath_diag_pkt structure saying where to get the packet
- * @count: size of data to write
- * @off: unused by this code
- */
-static ssize_t ipath_diagpkt_write(struct file *fp,
-                                  const char __user *data,
-                                  size_t count, loff_t *off)
-{
-       u32 __iomem *piobuf;
-       u32 plen, pbufn, maxlen_reserve;
-       struct ipath_diag_pkt odp;
-       struct ipath_diag_xpkt dp;
-       u32 *tmpbuf = NULL;
-       struct ipath_devdata *dd;
-       ssize_t ret = 0;
-       u64 val;
-       u32 l_state, lt_state; /* LinkState, LinkTrainingState */
-
-
-       if (count == sizeof(dp)) {
-               if (copy_from_user(&dp, data, sizeof(dp))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-       } else if (count == sizeof(odp)) {
-               if (copy_from_user(&odp, data, sizeof(odp))) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-               dp.len = odp.len;
-               dp.unit = odp.unit;
-               dp.data = odp.data;
-               dp.pbc_wd = 0;
-       } else {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /* send count must be an exact number of dwords */
-       if (dp.len & 3) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       plen = dp.len >> 2;
-
-       dd = ipath_lookup(dp.unit);
-       if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
-           !dd->ipath_kregbase) {
-               ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
-                          dp.unit);
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (ipath_diag_inuse && !diag_set_link &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               diag_set_link = 1;
-               ipath_cdbg(VERBOSE, "Trying to set to set link active for "
-                          "diag pkt\n");
-               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-       }
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
-               ret = -ENODEV;
-               goto bail;
-       }
-       /*
-        * Want to skip check for l_state if using custom PBC,
-        * because we might be trying to force an SM packet out.
-        * first-cut, skip _all_ state checking in that case.
-        */
-       val = ipath_ib_state(dd, dd->ipath_lastibcstat);
-       lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-       l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-       if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
-           (val != dd->ib_init && val != dd->ib_arm &&
-           val != dd->ib_active))) {
-               ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
-                          dd->ipath_unit, (unsigned long long) val);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * need total length before first word written, plus 2 Dwords. One Dword
-        * is for padding so we get the full user data when not aligned on
-        * a word boundary. The other Dword is to make sure we have room for the
-        * ICRC which gets tacked on later.
-        */
-       maxlen_reserve = 2 * sizeof(u32);
-       if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
-               ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-                         dp.len, dd->ipath_ibmaxlen);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       plen = sizeof(u32) + dp.len;
-
-       tmpbuf = vmalloc(plen);
-       if (!tmpbuf) {
-               dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
-                        "failing\n");
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       if (copy_from_user(tmpbuf,
-                          (const void __user *) (unsigned long) dp.data,
-                          dp.len)) {
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       plen >>= 2;             /* in dwords */
-
-       piobuf = ipath_getpiobuf(dd, plen, &pbufn);
-       if (!piobuf) {
-               ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
-                          dd->ipath_unit);
-               ret = -EBUSY;
-               goto bail;
-       }
-       /* disarm it just to be extra sure */
-       ipath_disarm_piobufs(dd, pbufn, 1);
-
-       if (ipath_debug & __IPATH_PKTDBG)
-               ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
-                          dd->ipath_unit, plen - 1, pbufn);
-
-       if (dp.pbc_wd == 0)
-               dp.pbc_wd = plen;
-       writeq(dp.pbc_wd, piobuf);
-       /*
-        * Copy all by the trigger word, then flush, so it's written
-        * to chip before trigger word, then write trigger word, then
-        * flush again, so packet is sent.
-        */
-       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-               ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
-               ipath_flush_wc();
-               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
-       } else
-               __iowrite32_copy(piobuf + 2, tmpbuf, plen);
-
-       ipath_flush_wc();
-
-       ret = sizeof(dp);
-
-bail:
-       vfree(tmpbuf);
-       return ret;
-}
-
-static int ipath_diag_release(struct inode *in, struct file *fp)
-{
-       mutex_lock(&ipath_mutex);
-       ipath_diag_inuse = 0;
-       fp->private_data = NULL;
-       mutex_unlock(&ipath_mutex);
-       return 0;
-}
-
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-                              size_t count, loff_t *off)
-{
-       struct ipath_devdata *dd = fp->private_data;
-       void __iomem *kreg_base;
-       ssize_t ret;
-
-       kreg_base = dd->ipath_kregbase;
-
-       if (count == 0)
-               ret = 0;
-       else if ((count % 4) || (*off % 4))
-               /* address or length is not 32-bit aligned, hence invalid */
-               ret = -EINVAL;
-       else if (ipath_diag_inuse < 1 && (*off || count != 8))
-               ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
-       else if ((count % 8) || (*off % 8))
-               /* address or length not 64-bit aligned; do 32-bit reads */
-               ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
-       else
-               ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
-
-       if (ret >= 0) {
-               *off += count;
-               ret = count;
-               if (ipath_diag_inuse == -2)
-                       ipath_diag_inuse++;
-       }
-
-       return ret;
-}
-
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-                               size_t count, loff_t *off)
-{
-       struct ipath_devdata *dd = fp->private_data;
-       void __iomem *kreg_base;
-       ssize_t ret;
-
-       kreg_base = dd->ipath_kregbase;
-
-       if (count == 0)
-               ret = 0;
-       else if ((count % 4) || (*off % 4))
-               /* address or length is not 32-bit aligned, hence invalid */
-               ret = -EINVAL;
-       else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
-                ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
-               ret = -EINVAL;  /* before any other write allowed */
-       else if ((count % 8) || (*off % 8))
-               /* address or length not 64-bit aligned; do 32-bit writes */
-               ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
-       else
-               ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
-
-       if (ret >= 0) {
-               *off += count;
-               ret = count;
-               if (ipath_diag_inuse == -1)
-                       ipath_diag_inuse = 1; /* all read/write OK now */
-       }
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_dma.c b/drivers/staging/rdma/ipath/ipath_dma.c
deleted file mode 100644 (file)
index 123a8c0..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/gfp.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-       return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 ipath_dma_map_single(struct ib_device *dev,
-                               void *cpu_addr, size_t size,
-                               enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-       return (u64) cpu_addr;
-}
-
-static void ipath_dma_unmap_single(struct ib_device *dev,
-                                  u64 addr, size_t size,
-                                  enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 ipath_dma_map_page(struct ib_device *dev,
-                             struct page *page,
-                             unsigned long offset,
-                             size_t size,
-                             enum dma_data_direction direction)
-{
-       u64 addr;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       if (offset + size > PAGE_SIZE) {
-               addr = BAD_DMA_ADDRESS;
-               goto done;
-       }
-
-       addr = (u64) page_address(page);
-       if (addr)
-               addr += offset;
-       /* TODO: handle highmem pages */
-
-done:
-       return addr;
-}
-
-static void ipath_dma_unmap_page(struct ib_device *dev,
-                                u64 addr, size_t size,
-                                enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-                       int nents, enum dma_data_direction direction)
-{
-       struct scatterlist *sg;
-       u64 addr;
-       int i;
-       int ret = nents;
-
-       BUG_ON(!valid_dma_direction(direction));
-
-       for_each_sg(sgl, sg, nents, i) {
-               addr = (u64) page_address(sg_page(sg));
-               /* TODO: handle highmem pages */
-               if (!addr) {
-                       ret = 0;
-                       break;
-               }
-               sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-               sg->dma_length = sg->length;
-#endif
-       }
-       return ret;
-}
-
-static void ipath_unmap_sg(struct ib_device *dev,
-                          struct scatterlist *sg, int nents,
-                          enum dma_data_direction direction)
-{
-       BUG_ON(!valid_dma_direction(direction));
-}
-
-static void ipath_sync_single_for_cpu(struct ib_device *dev,
-                                     u64 addr,
-                                     size_t size,
-                                     enum dma_data_direction dir)
-{
-}
-
-static void ipath_sync_single_for_device(struct ib_device *dev,
-                                        u64 addr,
-                                        size_t size,
-                                        enum dma_data_direction dir)
-{
-}
-
-static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
-                                     u64 *dma_handle, gfp_t flag)
-{
-       struct page *p;
-       void *addr = NULL;
-
-       p = alloc_pages(flag, get_order(size));
-       if (p)
-               addr = page_address(p);
-       if (dma_handle)
-               *dma_handle = (u64) addr;
-       return addr;
-}
-
-static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
-                                   void *cpu_addr, u64 dma_handle)
-{
-       free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-       .mapping_error = ipath_mapping_error,
-       .map_single = ipath_dma_map_single,
-       .unmap_single = ipath_dma_unmap_single,
-       .map_page = ipath_dma_map_page,
-       .unmap_page = ipath_dma_unmap_page,
-       .map_sg = ipath_map_sg,
-       .unmap_sg = ipath_unmap_sg,
-       .sync_single_for_cpu = ipath_sync_single_for_cpu,
-       .sync_single_for_device = ipath_sync_single_for_device,
-       .alloc_coherent = ipath_dma_alloc_coherent,
-       .free_coherent = ipath_dma_free_coherent
-};
diff --git a/drivers/staging/rdma/ipath/ipath_driver.c b/drivers/staging/rdma/ipath/ipath_driver.c
deleted file mode 100644 (file)
index 2ab22f9..0000000
+++ /dev/null
@@ -1,2784 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/bitmap.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#ifdef CONFIG_X86_64
-#include <asm/pat.h>
-#endif
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-
-static void ipath_update_pio_bufs(struct ipath_devdata *);
-
-const char *ipath_get_unit_name(int unit)
-{
-       static char iname[16];
-       snprintf(iname, sizeof iname, "infinipath%u", unit);
-       return iname;
-}
-
-#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
-#define PFX IPATH_DRV_NAME ": "
-
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
- */
-const char ib_ipath_version[] = IPATH_IDSTR "\n";
-
-static struct idr unit_table;
-DEFINE_SPINLOCK(ipath_devs_lock);
-LIST_HEAD(ipath_dev_list);
-
-wait_queue_head_t ipath_state_wait;
-
-unsigned ipath_debug = __IPATH_INFO;
-
-module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "mask for debug prints");
-EXPORT_SYMBOL_GPL(ipath_debug);
-
-unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
-module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
-MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
-
-static unsigned ipath_hol_timeout_ms = 13000;
-module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
-MODULE_PARM_DESC(hol_timeout_ms,
-       "duration of user app suspension after link failure");
-
-unsigned ipath_linkrecovery = 1;
-module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
-
-/*
- * Table to translate the LINKTRAININGSTATE portion of
- * IBCStatus to a human-readable form.
- */
-const char *ipath_ibcstatus_str[] = {
-       "Disabled",
-       "LinkUp",
-       "PollActive",
-       "PollQuiet",
-       "SleepDelay",
-       "SleepQuiet",
-       "LState6",              /* unused */
-       "LState7",              /* unused */
-       "CfgDebounce",
-       "CfgRcvfCfg",
-       "CfgWaitRmt",
-       "CfgIdle",
-       "RecovRetrain",
-       "CfgTxRevLane",         /* unused before IBA7220 */
-       "RecovWaitRmt",
-       "RecovIdle",
-       /* below were added for IBA7220 */
-       "CfgEnhanced",
-       "CfgTest",
-       "CfgWaitRmtTest",
-       "CfgWaitCfgEnhanced",
-       "SendTS_T",
-       "SendTstIdles",
-       "RcvTS_T",
-       "SendTst_TS1s",
-       "LTState18", "LTState19", "LTState1A", "LTState1B",
-       "LTState1C", "LTState1D", "LTState1E", "LTState1F"
-};
-
-static void ipath_remove_one(struct pci_dev *);
-static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
-
-/* Only needed for registration, nothing else needs this info */
-#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-
-/* Number of seconds before our card status check...  */
-#define STATUS_TIMEOUT 60
-
-static const struct pci_device_id ipath_pci_tbl[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
-       { 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
-
-static struct pci_driver ipath_driver = {
-       .name = IPATH_DRV_NAME,
-       .probe = ipath_init_one,
-       .remove = ipath_remove_one,
-       .id_table = ipath_pci_tbl,
-       .driver = {
-               .groups = ipath_driver_attr_groups,
-       },
-};
-
-static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
-                            u32 *bar0, u32 *bar1)
-{
-       int ret;
-
-       ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
-       if (ret)
-               ipath_dev_err(dd, "failed to read bar0 before enable: "
-                             "error %d\n", -ret);
-
-       ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
-       if (ret)
-               ipath_dev_err(dd, "failed to read bar1 before enable: "
-                             "error %d\n", -ret);
-
-       ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
-}
-
-static void ipath_free_devdata(struct pci_dev *pdev,
-                              struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       pci_set_drvdata(pdev, NULL);
-
-       if (dd->ipath_unit != -1) {
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-               idr_remove(&unit_table, dd->ipath_unit);
-               list_del(&dd->ipath_list);
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       }
-       vfree(dd);
-}
-
-static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
-{
-       unsigned long flags;
-       struct ipath_devdata *dd;
-       int ret;
-
-       dd = vzalloc(sizeof(*dd));
-       if (!dd) {
-               dd = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-       dd->ipath_unit = -1;
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate unit ID: error %d\n", -ret);
-               ipath_free_devdata(pdev, dd);
-               dd = ERR_PTR(ret);
-               goto bail_unlock;
-       }
-       dd->ipath_unit = ret;
-
-       dd->pcidev = pdev;
-       pci_set_drvdata(pdev, dd);
-
-       list_add(&dd->ipath_list, &ipath_dev_list);
-
-bail_unlock:
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-       idr_preload_end();
-bail:
-       return dd;
-}
-
-static inline struct ipath_devdata *__ipath_lookup(int unit)
-{
-       return idr_find(&unit_table, unit);
-}
-
-struct ipath_devdata *ipath_lookup(int unit)
-{
-       struct ipath_devdata *dd;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-       dd = __ipath_lookup(unit);
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       return dd;
-}
-
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
-{
-       int nunits, npresent, nup;
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       int maxports;
-
-       nunits = npresent = nup = maxports = 0;
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-               nunits++;
-               if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
-                       npresent++;
-               if (dd->ipath_lid &&
-                   !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
-                                        | IPATH_LINKUNK)))
-                       nup++;
-               if (dd->ipath_cfgports > maxports)
-                       maxports = dd->ipath_cfgports;
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       if (npresentp)
-               *npresentp = npresent;
-       if (nupp)
-               *nupp = nup;
-       if (maxportsp)
-               *maxportsp = maxports;
-
-       return nunits;
-}
-
-/*
- * These next two routines are placeholders in case we don't have per-arch
- * code for controlling write combining.  If explicit control of write
- * combining is not available, performance will probably be awful.
- */
-
-int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
-{
-       return -EOPNOTSUPP;
-}
-
-void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
-{
-}
-
-/*
- * Perform a PIO buffer bandwidth write test, to verify proper system
- * configuration.  Even when all the setup calls work, occasionally
- * BIOS or other issues can prevent write combining from working, or
- * can cause other bandwidth problems to the chip.
- *
- * This test simply writes the same buffer over and over again, and
- * measures close to the peak bandwidth to the chip (not testing
- * data bandwidth to the wire).   On chips that use an address-based
- * trigger to send packets to the wire, this is easy.  On chips that
- * use a count to trigger, we want to make sure that the packet doesn't
- * go out on the wire, or trigger flow control checks.
- */
-static void ipath_verify_pioperf(struct ipath_devdata *dd)
-{
-       u32 pbnum, cnt, lcnt;
-       u32 __iomem *piobuf;
-       u32 *addr;
-       u64 msecs, emsecs;
-
-       piobuf = ipath_getpiobuf(dd, 0, &pbnum);
-       if (!piobuf) {
-               dev_info(&dd->pcidev->dev,
-                       "No PIObufs for checking perf, skipping\n");
-               return;
-       }
-
-       /*
-        * Enough to give us a reasonable test, less than piobuf size, and
-        * likely multiple of store buffer length.
-        */
-       cnt = 1024;
-
-       addr = vmalloc(cnt);
-       if (!addr) {
-               dev_info(&dd->pcidev->dev,
-                       "Couldn't get memory for checking PIO perf,"
-                       " skipping\n");
-               goto done;
-       }
-
-       preempt_disable();  /* we want reasonably accurate elapsed time */
-       msecs = 1 + jiffies_to_msecs(jiffies);
-       for (lcnt = 0; lcnt < 10000U; lcnt++) {
-               /* wait until we cross msec boundary */
-               if (jiffies_to_msecs(jiffies) >= msecs)
-                       break;
-               udelay(1);
-       }
-
-       ipath_disable_armlaunch(dd);
-
-       /*
-        * length 0, no dwords actually sent, and mark as VL15
-        * on chips where that may matter (due to IB flowcontrol)
-        */
-       if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
-               writeq(1UL << 63, piobuf);
-       else
-               writeq(0, piobuf);
-       ipath_flush_wc();
-
-       /*
-        * this is only roughly accurate, since even with preempt we
-        * still take interrupts that could take a while.   Running for
-        * >= 5 msec seems to get us "close enough" to accurate values
-        */
-       msecs = jiffies_to_msecs(jiffies);
-       for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
-               __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
-               emsecs = jiffies_to_msecs(jiffies) - msecs;
-       }
-
-       /* 1 GiB/sec, slightly over IB SDR line rate */
-       if (lcnt < (emsecs * 1024U))
-               ipath_dev_err(dd,
-                       "Performance problem: bandwidth to PIO buffers is "
-                       "only %u MiB/sec\n",
-                       lcnt / (u32) emsecs);
-       else
-               ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
-                       lcnt / (u32) emsecs);
-
-       preempt_enable();
-
-       vfree(addr);
-
-done:
-       /* disarm piobuf, so it's available again */
-       ipath_disarm_piobufs(dd, pbnum, 1);
-       ipath_enable_armlaunch(dd);
-}
-
-static void cleanup_device(struct ipath_devdata *dd);
-
-static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-       int ret, len, j;
-       struct ipath_devdata *dd;
-       unsigned long long addr;
-       u32 bar0 = 0, bar1 = 0;
-
-#ifdef CONFIG_X86_64
-       if (pat_enabled()) {
-               pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-#endif
-
-       dd = ipath_alloc_devdata(pdev);
-       if (IS_ERR(dd)) {
-               ret = PTR_ERR(dd);
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate devdata: error %d\n", -ret);
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
-
-       ret = pci_enable_device(pdev);
-       if (ret) {
-               /* This can happen iff:
-                *
-                * We did a chip reset, and then failed to reprogram the
-                * BAR, or the chip reset due to an internal error.  We then
-                * unloaded the driver and reloaded it.
-                *
-                * Both reset cases set the BAR back to initial state.  For
-                * the latter case, the AER sticky error bit at offset 0x718
-                * should be set, but the Linux kernel doesn't yet know
-                * about that, it appears.  If the original BAR was retained
-                * in the kernel data structures, this may be OK.
-                */
-               ipath_dev_err(dd, "enable unit %d failed: error %d\n",
-                             dd->ipath_unit, -ret);
-               goto bail_devdata;
-       }
-       addr = pci_resource_start(pdev, 0);
-       len = pci_resource_len(pdev, 0);
-       ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
-                  "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
-                  ent->device, ent->driver_data);
-
-       read_bars(dd, pdev, &bar0, &bar1);
-
-       if (!bar1 && !(bar0 & ~0xf)) {
-               if (addr) {
-                       dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
-                                "rewriting as %llx\n", addr);
-                       ret = pci_write_config_dword(
-                               pdev, PCI_BASE_ADDRESS_0, addr);
-                       if (ret) {
-                               ipath_dev_err(dd, "rewrite of BAR0 "
-                                             "failed: err %d\n", -ret);
-                               goto bail_disable;
-                       }
-                       ret = pci_write_config_dword(
-                               pdev, PCI_BASE_ADDRESS_1, addr >> 32);
-                       if (ret) {
-                               ipath_dev_err(dd, "rewrite of BAR1 "
-                                             "failed: err %d\n", -ret);
-                               goto bail_disable;
-                       }
-               } else {
-                       ipath_dev_err(dd, "BAR is 0 (probable RESET), "
-                                     "not usable until reboot\n");
-                       ret = -ENODEV;
-                       goto bail_disable;
-               }
-       }
-
-       ret = pci_request_regions(pdev, IPATH_DRV_NAME);
-       if (ret) {
-               dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
-                        "err %d\n", dd->ipath_unit, -ret);
-               goto bail_disable;
-       }
-
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (ret) {
-               /*
-                * if the 64 bit setup fails, try 32 bit.  Some systems
-                * do not setup 64 bit maps on systems with 2GB or less
-                * memory installed.
-                */
-               ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (ret) {
-                       dev_info(&pdev->dev,
-                               "Unable to set DMA mask for unit %u: %d\n",
-                               dd->ipath_unit, ret);
-                       goto bail_regions;
-               } else {
-                       ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
-                       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-                       if (ret)
-                               dev_info(&pdev->dev,
-                                       "Unable to set DMA consistent mask "
-                                       "for unit %u: %d\n",
-                                       dd->ipath_unit, ret);
-
-               }
-       } else {
-               ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (ret)
-                       dev_info(&pdev->dev,
-                               "Unable to set DMA consistent mask "
-                               "for unit %u: %d\n",
-                               dd->ipath_unit, ret);
-       }
-
-       pci_set_master(pdev);
-
-       /*
-        * Save BARs to rewrite after device reset.  Save all 64 bits of
-        * BAR, just in case.
-        */
-       dd->ipath_pcibar0 = addr;
-       dd->ipath_pcibar1 = addr >> 32;
-       dd->ipath_deviceid = ent->device;       /* save for later use */
-       dd->ipath_vendorid = ent->vendor;
-
-       /* setup the chip-specific functions, as early as possible. */
-       switch (ent->device) {
-       case PCI_DEVICE_ID_INFINIPATH_HT:
-               ipath_init_iba6110_funcs(dd);
-               break;
-
-       default:
-               ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
-                             "failing\n", ent->device);
-               return -ENODEV;
-       }
-
-       for (j = 0; j < 6; j++) {
-               if (!pdev->resource[j].start)
-                       continue;
-               ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
-                          j, &pdev->resource[j],
-                          (unsigned long long)pci_resource_len(pdev, j));
-       }
-
-       if (!addr) {
-               ipath_dev_err(dd, "No valid address in BAR 0!\n");
-               ret = -ENODEV;
-               goto bail_regions;
-       }
-
-       dd->ipath_pcirev = pdev->revision;
-
-#if defined(__powerpc__)
-       /* There isn't a generic way to specify writethrough mappings */
-       dd->ipath_kregbase = __ioremap(addr, len,
-               (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
-#else
-       /* XXX: split this properly to enable on PAT */
-       dd->ipath_kregbase = ioremap_nocache(addr, len);
-#endif
-
-       if (!dd->ipath_kregbase) {
-               ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
-                         addr);
-               ret = -ENOMEM;
-               goto bail_iounmap;
-       }
-       dd->ipath_kregend = (u64 __iomem *)
-               ((void __iomem *)dd->ipath_kregbase + len);
-       dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
-       /* for user mmap */
-       ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
-                  addr, dd->ipath_kregbase);
-
-       if (dd->ipath_f_bus(dd, pdev))
-               ipath_dev_err(dd, "Failed to setup config space; "
-                             "continuing anyway\n");
-
-       /*
-        * set up our interrupt handler; IRQF_SHARED probably not needed,
-        * since MSI interrupts shouldn't be shared but won't  hurt for now.
-        * check 0 irq after we return from chip-specific bus setup, since
-        * that can affect this due to setup
-        */
-       if (!dd->ipath_irq)
-               ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
-                             "work\n");
-       else {
-               ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
-                                 IPATH_DRV_NAME, dd);
-               if (ret) {
-                       ipath_dev_err(dd, "Couldn't setup irq handler, "
-                                     "irq=%d: %d\n", dd->ipath_irq, ret);
-                       goto bail_iounmap;
-               }
-       }
-
-       ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
-       if (ret)
-               goto bail_irqsetup;
-
-       ret = ipath_enable_wc(dd);
-
-       if (ret)
-               ret = 0;
-
-       ipath_verify_pioperf(dd);
-
-       ipath_device_create_group(&pdev->dev, dd);
-       ipathfs_add_device(dd);
-       ipath_user_add(dd);
-       ipath_diag_add(dd);
-       ipath_register_ib_device(dd);
-
-       goto bail;
-
-bail_irqsetup:
-       cleanup_device(dd);
-
-       if (dd->ipath_irq)
-               dd->ipath_f_free_irq(dd);
-
-       if (dd->ipath_f_cleanup)
-               dd->ipath_f_cleanup(dd);
-
-bail_iounmap:
-       iounmap((volatile void __iomem *) dd->ipath_kregbase);
-
-bail_regions:
-       pci_release_regions(pdev);
-
-bail_disable:
-       pci_disable_device(pdev);
-
-bail_devdata:
-       ipath_free_devdata(pdev, dd);
-
-bail:
-       return ret;
-}
-
-static void cleanup_device(struct ipath_devdata *dd)
-{
-       int port;
-       struct ipath_portdata **tmp;
-       unsigned long flags;
-
-       if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-               /* can't do anything more with chip; needs re-init */
-               *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-               if (dd->ipath_kregbase) {
-                       /*
-                        * if we haven't already cleaned up before these are
-                        * to ensure any register reads/writes "fail" until
-                        * re-init
-                        */
-                       dd->ipath_kregbase = NULL;
-                       dd->ipath_uregbase = 0;
-                       dd->ipath_sregbase = 0;
-                       dd->ipath_cregbase = 0;
-                       dd->ipath_kregsize = 0;
-               }
-               ipath_disable_wc(dd);
-       }
-
-       if (dd->ipath_spectriggerhit)
-               dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
-                        dd->ipath_spectriggerhit);
-
-       if (dd->ipath_pioavailregs_dma) {
-               dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                 (void *) dd->ipath_pioavailregs_dma,
-                                 dd->ipath_pioavailregs_phys);
-               dd->ipath_pioavailregs_dma = NULL;
-       }
-       if (dd->ipath_dummy_hdrq) {
-               dma_free_coherent(&dd->pcidev->dev,
-                       dd->ipath_pd[0]->port_rcvhdrq_size,
-                       dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-               dd->ipath_dummy_hdrq = NULL;
-       }
-
-       if (dd->ipath_pageshadow) {
-               struct page **tmpp = dd->ipath_pageshadow;
-               dma_addr_t *tmpd = dd->ipath_physshadow;
-               int i, cnt = 0;
-
-               ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-                          "locked\n");
-               for (port = 0; port < dd->ipath_cfgports; port++) {
-                       int port_tidbase = port * dd->ipath_rcvtidcnt;
-                       int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-                       for (i = port_tidbase; i < maxtid; i++) {
-                               if (!tmpp[i])
-                                       continue;
-                               pci_unmap_page(dd->pcidev, tmpd[i],
-                                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                               ipath_release_user_pages(&tmpp[i], 1);
-                               tmpp[i] = NULL;
-                               cnt++;
-                       }
-               }
-               if (cnt) {
-                       ipath_stats.sps_pageunlocks += cnt;
-                       ipath_cdbg(VERBOSE, "There were still %u expTID "
-                                  "entries locked\n", cnt);
-               }
-               if (ipath_stats.sps_pagelocks ||
-                   ipath_stats.sps_pageunlocks)
-                       ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-                                  "unlocked via ipath_m{un}lock\n",
-                                  (unsigned long long)
-                                  ipath_stats.sps_pagelocks,
-                                  (unsigned long long)
-                                  ipath_stats.sps_pageunlocks);
-
-               ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-                          dd->ipath_pageshadow);
-               tmpp = dd->ipath_pageshadow;
-               dd->ipath_pageshadow = NULL;
-               vfree(tmpp);
-
-               dd->ipath_egrtidbase = NULL;
-       }
-
-       /*
-        * free any resources still in use (usually just kernel ports)
-        * at unload; we do for portcnt, because that's what we allocate.
-        * We acquire lock to be really paranoid that ipath_pd isn't being
-        * accessed from some interrupt-related code (that should not happen,
-        * but best to be sure).
-        */
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       tmp = dd->ipath_pd;
-       dd->ipath_pd = NULL;
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-       for (port = 0; port < dd->ipath_portcnt; port++) {
-               struct ipath_portdata *pd = tmp[port];
-               tmp[port] = NULL; /* debugging paranoia */
-               ipath_free_pddata(dd, pd);
-       }
-       kfree(tmp);
-}
-
-static void ipath_remove_one(struct pci_dev *pdev)
-{
-       struct ipath_devdata *dd = pci_get_drvdata(pdev);
-
-       ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
-
-       /*
-        * disable the IB link early, to be sure no new packets arrive, which
-        * complicates the shutdown process
-        */
-       ipath_shutdown_device(dd);
-
-       flush_workqueue(ib_wq);
-
-       if (dd->verbs_dev)
-               ipath_unregister_ib_device(dd->verbs_dev);
-
-       ipath_diag_remove(dd);
-       ipath_user_remove(dd);
-       ipathfs_remove_device(dd);
-       ipath_device_remove_group(&pdev->dev, dd);
-
-       ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
-                  "unit %u\n", dd, (u32) dd->ipath_unit);
-
-       cleanup_device(dd);
-
-       /*
-        * turn off rcv, send, and interrupts for all ports, all drivers
-        * should also hard reset the chip here?
-        * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-        * for all versions of the driver, if they were allocated
-        */
-       if (dd->ipath_irq) {
-               ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
-                          dd->ipath_unit, dd->ipath_irq);
-               dd->ipath_f_free_irq(dd);
-       } else
-               ipath_dbg("irq is 0, not doing free_irq "
-                         "for unit %u\n", dd->ipath_unit);
-       /*
-        * we check for NULL here, because it's outside
-        * the kregbase check, and we need to call it
-        * after the free_irq.  Thus it's possible that
-        * the function pointers were never initialized.
-        */
-       if (dd->ipath_f_cleanup)
-               /* clean up chip-specific stuff */
-               dd->ipath_f_cleanup(dd);
-
-       ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
-       iounmap((volatile void __iomem *) dd->ipath_kregbase);
-       pci_release_regions(pdev);
-       ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
-       pci_disable_device(pdev);
-
-       ipath_free_devdata(pdev, dd);
-}
-
-/* general driver use */
-DEFINE_MUTEX(ipath_mutex);
-
-static DEFINE_SPINLOCK(ipath_pioavail_lock);
-
-/**
- * ipath_disarm_piobufs - cancel a range of PIO buffers
- * @dd: the infinipath device
- * @first: the first PIO buffer to cancel
- * @cnt: the number of PIO buffers to cancel
- *
- * cancel a range of PIO buffers, used when they might be armed, but
- * not triggered.  Used at init to ensure buffer state, and also user
- * process close, in case it died while writing to a PIO buffer
- * Also after errors.
- */
-void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
-                         unsigned cnt)
-{
-       unsigned i, last = first + cnt;
-       unsigned long flags;
-
-       ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-       for (i = first; i < last; i++) {
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               /*
-                * The disarm-related bits are write-only, so it
-                * is ok to OR them in with our copy of sendctrl
-                * while we hold the lock.
-                */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl | INFINIPATH_S_DISARM |
-                       (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
-               /* can't disarm bufs back-to-back per iba7220 spec */
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-       /* on some older chips, update may not happen after cancel */
-       ipath_force_pio_avail_update(dd);
-}
-
-/**
- * ipath_wait_linkstate - wait for an IB link state change to occur
- * @dd: the infinipath device
- * @state: the state to wait for
- * @msecs: the number of milliseconds to wait
- *
- * wait up to msecs milliseconds for IB link state change to occur for
- * now, take the easy polling route.  Currently used only by
- * ipath_set_linkstate.  Returns 0 if state reached, otherwise
- * -ETIMEDOUT state can have multiple states set, for any of several
- * transitions.
- */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
-{
-       dd->ipath_state_wanted = state;
-       wait_event_interruptible_timeout(ipath_state_wait,
-                                        (dd->ipath_flags & state),
-                                        msecs_to_jiffies(msecs));
-       dd->ipath_state_wanted = 0;
-
-       if (!(dd->ipath_flags & state)) {
-               u64 val;
-               ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
-                          " ms\n",
-                          /* test INIT ahead of DOWN, both can be set */
-                          (state & IPATH_LINKINIT) ? "INIT" :
-                          ((state & IPATH_LINKDOWN) ? "DOWN" :
-                           ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
-                          msecs);
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-               ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
-                          (unsigned long long) ipath_read_kreg64(
-                                  dd, dd->ipath_kregs->kr_ibcctrl),
-                          (unsigned long long) val,
-                          ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
-       }
-       return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
-}
-
-static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
-       char *buf, size_t blen)
-{
-       static const struct {
-               ipath_err_t err;
-               const char *msg;
-       } errs[] = {
-               { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
-               { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
-               { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
-               { INFINIPATH_E_SDMABASE, "SDmaBase" },
-               { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
-               { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
-               { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
-               { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
-               { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
-               { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
-               { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
-               { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
-       };
-       int i;
-       int expected;
-       size_t bidx = 0;
-
-       for (i = 0; i < ARRAY_SIZE(errs); i++) {
-               expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
-                       test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-               if ((err & errs[i].err) && !expected)
-                       bidx += snprintf(buf + bidx, blen - bidx,
-                                        "%s ", errs[i].msg);
-       }
-}
-
-/*
- * Decode the error status into strings, deciding whether to always
- * print * it or not depending on "normal packet errors" vs everything
- * else.   Return 1 if "real" errors, otherwise 0 if only packet
- * errors, so caller can decide what to print with the string.
- */
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-       ipath_err_t err)
-{
-       int iserr = 1;
-       *buf = '\0';
-       if (err & INFINIPATH_E_PKTERRS) {
-               if (!(err & ~INFINIPATH_E_PKTERRS))
-                       iserr = 0; // if only packet errors.
-               if (ipath_debug & __IPATH_ERRPKTDBG) {
-                       if (err & INFINIPATH_E_REBP)
-                               strlcat(buf, "EBP ", blen);
-                       if (err & INFINIPATH_E_RVCRC)
-                               strlcat(buf, "VCRC ", blen);
-                       if (err & INFINIPATH_E_RICRC) {
-                               strlcat(buf, "CRC ", blen);
-                               // clear for check below, so only once
-                               err &= INFINIPATH_E_RICRC;
-                       }
-                       if (err & INFINIPATH_E_RSHORTPKTLEN)
-                               strlcat(buf, "rshortpktlen ", blen);
-                       if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-                               strlcat(buf, "sdroppeddatapkt ", blen);
-                       if (err & INFINIPATH_E_SPKTLEN)
-                               strlcat(buf, "spktlen ", blen);
-               }
-               if ((err & INFINIPATH_E_RICRC) &&
-                       !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
-                       strlcat(buf, "CRC ", blen);
-               if (!iserr)
-                       goto done;
-       }
-       if (err & INFINIPATH_E_RHDRLEN)
-               strlcat(buf, "rhdrlen ", blen);
-       if (err & INFINIPATH_E_RBADTID)
-               strlcat(buf, "rbadtid ", blen);
-       if (err & INFINIPATH_E_RBADVERSION)
-               strlcat(buf, "rbadversion ", blen);
-       if (err & INFINIPATH_E_RHDR)
-               strlcat(buf, "rhdr ", blen);
-       if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
-               strlcat(buf, "sendspecialtrigger ", blen);
-       if (err & INFINIPATH_E_RLONGPKTLEN)
-               strlcat(buf, "rlongpktlen ", blen);
-       if (err & INFINIPATH_E_RMAXPKTLEN)
-               strlcat(buf, "rmaxpktlen ", blen);
-       if (err & INFINIPATH_E_RMINPKTLEN)
-               strlcat(buf, "rminpktlen ", blen);
-       if (err & INFINIPATH_E_SMINPKTLEN)
-               strlcat(buf, "sminpktlen ", blen);
-       if (err & INFINIPATH_E_RFORMATERR)
-               strlcat(buf, "rformaterr ", blen);
-       if (err & INFINIPATH_E_RUNSUPVL)
-               strlcat(buf, "runsupvl ", blen);
-       if (err & INFINIPATH_E_RUNEXPCHAR)
-               strlcat(buf, "runexpchar ", blen);
-       if (err & INFINIPATH_E_RIBFLOW)
-               strlcat(buf, "ribflow ", blen);
-       if (err & INFINIPATH_E_SUNDERRUN)
-               strlcat(buf, "sunderrun ", blen);
-       if (err & INFINIPATH_E_SPIOARMLAUNCH)
-               strlcat(buf, "spioarmlaunch ", blen);
-       if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
-               strlcat(buf, "sunexperrpktnum ", blen);
-       if (err & INFINIPATH_E_SDROPPEDSMPPKT)
-               strlcat(buf, "sdroppedsmppkt ", blen);
-       if (err & INFINIPATH_E_SMAXPKTLEN)
-               strlcat(buf, "smaxpktlen ", blen);
-       if (err & INFINIPATH_E_SUNSUPVL)
-               strlcat(buf, "sunsupVL ", blen);
-       if (err & INFINIPATH_E_INVALIDADDR)
-               strlcat(buf, "invalidaddr ", blen);
-       if (err & INFINIPATH_E_RRCVEGRFULL)
-               strlcat(buf, "rcvegrfull ", blen);
-       if (err & INFINIPATH_E_RRCVHDRFULL)
-               strlcat(buf, "rcvhdrfull ", blen);
-       if (err & INFINIPATH_E_IBSTATUSCHANGED)
-               strlcat(buf, "ibcstatuschg ", blen);
-       if (err & INFINIPATH_E_RIBLOSTLINK)
-               strlcat(buf, "riblostlink ", blen);
-       if (err & INFINIPATH_E_HARDWARE)
-               strlcat(buf, "hardware ", blen);
-       if (err & INFINIPATH_E_RESET)
-               strlcat(buf, "reset ", blen);
-       if (err & INFINIPATH_E_SDMAERRS)
-               decode_sdma_errs(dd, err, buf, blen);
-       if (err & INFINIPATH_E_INVALIDEEPCMD)
-               strlcat(buf, "invalideepromcmd ", blen);
-done:
-       return iserr;
-}
-
-/**
- * get_rhf_errstring - decode RHF errors
- * @err: the err number
- * @msg: the output buffer
- * @len: the length of the output buffer
- *
- * only used one place now, may want more later
- */
-static void get_rhf_errstring(u32 err, char *msg, size_t len)
-{
-       /* if no errors, and so don't need to check what's first */
-       *msg = '\0';
-
-       if (err & INFINIPATH_RHF_H_ICRCERR)
-               strlcat(msg, "icrcerr ", len);
-       if (err & INFINIPATH_RHF_H_VCRCERR)
-               strlcat(msg, "vcrcerr ", len);
-       if (err & INFINIPATH_RHF_H_PARITYERR)
-               strlcat(msg, "parityerr ", len);
-       if (err & INFINIPATH_RHF_H_LENERR)
-               strlcat(msg, "lenerr ", len);
-       if (err & INFINIPATH_RHF_H_MTUERR)
-               strlcat(msg, "mtuerr ", len);
-       if (err & INFINIPATH_RHF_H_IHDRERR)
-               /* infinipath hdr checksum error */
-               strlcat(msg, "ipathhdrerr ", len);
-       if (err & INFINIPATH_RHF_H_TIDERR)
-               strlcat(msg, "tiderr ", len);
-       if (err & INFINIPATH_RHF_H_MKERR)
-               /* bad port, offset, etc. */
-               strlcat(msg, "invalid ipathhdr ", len);
-       if (err & INFINIPATH_RHF_H_IBERR)
-               strlcat(msg, "iberr ", len);
-       if (err & INFINIPATH_RHF_L_SWA)
-               strlcat(msg, "swA ", len);
-       if (err & INFINIPATH_RHF_L_SWB)
-               strlcat(msg, "swB ", len);
-}
-
-/**
- * ipath_get_egrbuf - get an eager buffer
- * @dd: the infinipath device
- * @bufnum: the eager buffer to get
- *
- * must only be called if ipath_pd[port] is known to be allocated
- */
-static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
-{
-       return dd->ipath_port0_skbinfo ?
-               (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
-}
-
-/**
- * ipath_alloc_skb - allocate an skb and buffer with possible constraints
- * @dd: the infinipath device
- * @gfp_mask: the sk_buff SFP mask
- */
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
-                               gfp_t gfp_mask)
-{
-       struct sk_buff *skb;
-       u32 len;
-
-       /*
-        * Only fully supported way to handle this is to allocate lots
-        * extra, align as needed, and then do skb_reserve().  That wastes
-        * a lot of memory...  I'll have to hack this into infinipath_copy
-        * also.
-        */
-
-       /*
-        * We need 2 extra bytes for ipath_ether data sent in the
-        * key header.  In order to keep everything dword aligned,
-        * we'll reserve 4 bytes.
-        */
-       len = dd->ipath_ibmaxlen + 4;
-
-       if (dd->ipath_flags & IPATH_4BYTE_TID) {
-               /* We need a 2KB multiple alignment, and there is no way
-                * to do it except to allocate extra and then skb_reserve
-                * enough to bring it up to the right alignment.
-                */
-               len += 2047;
-       }
-
-       skb = __dev_alloc_skb(len, gfp_mask);
-       if (!skb) {
-               ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
-                             len);
-               goto bail;
-       }
-
-       skb_reserve(skb, 4);
-
-       if (dd->ipath_flags & IPATH_4BYTE_TID) {
-               u32 una = (unsigned long)skb->data & 2047;
-               if (una)
-                       skb_reserve(skb, 2048 - una);
-       }
-
-bail:
-       return skb;
-}
-
-static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
-                            u32 eflags,
-                            u32 l,
-                            u32 etail,
-                            __le32 *rhf_addr,
-                            struct ipath_message_header *hdr)
-{
-       char emsg[128];
-
-       get_rhf_errstring(eflags, emsg, sizeof emsg);
-       ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
-                  "tlen=%x opcode=%x egridx=%x: %s\n",
-                  eflags, l,
-                  ipath_hdrget_rcv_type(rhf_addr),
-                  ipath_hdrget_length_in_bytes(rhf_addr),
-                  be32_to_cpu(hdr->bth[0]) >> 24,
-                  etail, emsg);
-
-       /* Count local link integrity errors. */
-       if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
-               u8 n = (dd->ipath_ibcctrl >>
-                       INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-                       INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-
-               if (++dd->ipath_lli_counter > n) {
-                       dd->ipath_lli_counter = 0;
-                       dd->ipath_lli_errors++;
-               }
-       }
-}
-
-/*
- * ipath_kreceive - receive a packet
- * @pd: the infinipath port
- *
- * called from interrupt handler for errors or receive interrupt
- */
-void ipath_kreceive(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       __le32 *rhf_addr;
-       void *ebuf;
-       const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
-       const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
-       u32 etail = -1, l, hdrqtail;
-       struct ipath_message_header *hdr;
-       u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
-       static u64 totcalls;    /* stats, may eventually remove */
-       int last;
-
-       l = pd->port_head;
-       rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
-       if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-               u32 seq = ipath_hdrget_seq(rhf_addr);
-
-               if (seq != pd->port_seq_cnt)
-                       goto bail;
-               hdrqtail = 0;
-       } else {
-               hdrqtail = ipath_get_rcvhdrtail(pd);
-               if (l == hdrqtail)
-                       goto bail;
-               smp_rmb();
-       }
-
-reloop:
-       for (last = 0, i = 1; !last; i += !last) {
-               hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
-               eflags = ipath_hdrget_err_flags(rhf_addr);
-               etype = ipath_hdrget_rcv_type(rhf_addr);
-               /* total length */
-               tlen = ipath_hdrget_length_in_bytes(rhf_addr);
-               ebuf = NULL;
-               if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
-                   ipath_hdrget_use_egr_buf(rhf_addr) :
-                   (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-                       /*
-                        * It turns out that the chip uses an eager buffer
-                        * for all non-expected packets, whether it "needs"
-                        * one or not.  So always get the index, but don't
-                        * set ebuf (so we try to copy data) unless the
-                        * length requires it.
-                        */
-                       etail = ipath_hdrget_index(rhf_addr);
-                       updegr = 1;
-                       if (tlen > sizeof(*hdr) ||
-                           etype == RCVHQ_RCV_TYPE_NON_KD)
-                               ebuf = ipath_get_egrbuf(dd, etail);
-               }
-
-               /*
-                * both tiderr and ipathhdrerr are set for all plain IB
-                * packets; only ipathhdrerr should be set.
-                */
-
-               if (etype != RCVHQ_RCV_TYPE_NON_KD &&
-                   etype != RCVHQ_RCV_TYPE_ERROR &&
-                   ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
-                   IPS_PROTO_VERSION)
-                       ipath_cdbg(PKT, "Bad InfiniPath protocol version "
-                                  "%x\n", etype);
-
-               if (unlikely(eflags))
-                       ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
-               else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-                       ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
-                       if (dd->ipath_lli_counter)
-                               dd->ipath_lli_counter--;
-               } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-                       u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
-                       u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
-                       ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-                                  "qp=%x), len %x; ignored\n",
-                                  etype, opcode, qp, tlen);
-               } else if (etype == RCVHQ_RCV_TYPE_EXPECTED) {
-                       ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
-                                 be32_to_cpu(hdr->bth[0]) >> 24);
-               } else {
-                       /*
-                        * error packet, type of error unknown.
-                        * Probably type 3, but we don't know, so don't
-                        * even try to print the opcode, etc.
-                        * Usually caused by a "bad packet", that has no
-                        * BTH, when the LRH says it should.
-                        */
-                       ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
-                                 " %x, len %x hdrq+%x rhf: %Lx\n",
-                                 etail, tlen, l, (unsigned long long)
-                                 le64_to_cpu(*(__le64 *) rhf_addr));
-                       if (ipath_debug & __IPATH_ERRPKTDBG) {
-                               u32 j, *d, dw = rsize-2;
-                               if (rsize > (tlen>>2))
-                                       dw = tlen>>2;
-                               d = (u32 *)hdr;
-                               printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
-                                       dw);
-                               for (j = 0; j < dw; j++)
-                                       printk(KERN_DEBUG "%8x%s", d[j],
-                                               (j%8) == 7 ? "\n" : " ");
-                               printk(KERN_DEBUG ".\n");
-                       }
-               }
-               l += rsize;
-               if (l >= maxcnt)
-                       l = 0;
-               rhf_addr = (__le32 *) pd->port_rcvhdrq +
-                       l + dd->ipath_rhf_offset;
-               if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-                       u32 seq = ipath_hdrget_seq(rhf_addr);
-
-                       if (++pd->port_seq_cnt > 13)
-                               pd->port_seq_cnt = 1;
-                       if (seq != pd->port_seq_cnt)
-                               last = 1;
-               } else if (l == hdrqtail) {
-                       last = 1;
-               }
-               /*
-                * update head regs on last packet, and every 16 packets.
-                * Reduce bus traffic, while still trying to prevent
-                * rcvhdrq overflows, for when the queue is nearly full
-                */
-               if (last || !(i & 0xf)) {
-                       u64 lval = l;
-
-                       /* request IBA6120 and 7220 interrupt only on last */
-                       if (last)
-                               lval |= dd->ipath_rhdrhead_intr_off;
-                       ipath_write_ureg(dd, ur_rcvhdrhead, lval,
-                               pd->port_port);
-                       if (updegr) {
-                               ipath_write_ureg(dd, ur_rcvegrindexhead,
-                                                etail, pd->port_port);
-                               updegr = 0;
-                       }
-               }
-       }
-
-       if (!dd->ipath_rhdrhead_intr_off && !reloop &&
-           !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-               /* IBA6110 workaround; we can have a race clearing chip
-                * interrupt with another interrupt about to be delivered,
-                * and can clear it before it is delivered on the GPIO
-                * workaround.  By doing the extra check here for the
-                * in-memory tail register updating while we were doing
-                * earlier packets, we "almost" guarantee we have covered
-                * that case.
-                */
-               u32 hqtail = ipath_get_rcvhdrtail(pd);
-               if (hqtail != hdrqtail) {
-                       hdrqtail = hqtail;
-                       reloop = 1; /* loop 1 extra time at most */
-                       goto reloop;
-               }
-       }
-
-       pkttot += i;
-
-       pd->port_head = l;
-
-       if (pkttot > ipath_stats.sps_maxpkts_call)
-               ipath_stats.sps_maxpkts_call = pkttot;
-       ipath_stats.sps_port0pkts += pkttot;
-       ipath_stats.sps_avgpkts_call =
-               ipath_stats.sps_port0pkts / ++totcalls;
-
-bail:;
-}
-
-/**
- * ipath_update_pio_bufs - update shadow copy of the PIO availability map
- * @dd: the infinipath device
- *
- * called whenever our local copy indicates we have run out of send buffers
- * NOTE: This can be called from interrupt context by some code
- * and from non-interrupt context by ipath_getpiobuf().
- */
-
-static void ipath_update_pio_bufs(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int i;
-       const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
-
-       /* If the generation (check) bits have changed, then we update the
-        * busy bit for the corresponding PIO buffer.  This algorithm will
-        * modify positions to the value they already have in some cases
-        * (i.e., no change), but it's faster than changing only the bits
-        * that have changed.
-        *
-        * We would like to do this atomicly, to avoid spinlocks in the
-        * critical send path, but that's not really possible, given the
-        * type of changes, and that this routine could be called on
-        * multiple cpu's simultaneously, so we lock in this routine only,
-        * to avoid conflicting updates; all we change is the shadow, and
-        * it's a single 64 bit memory location, so by definition the update
-        * is atomic in terms of what other cpu's can see in testing the
-        * bits.  The spin_lock overhead isn't too bad, since it only
-        * happens when all buffers are in use, so only cpu overhead, not
-        * latency or bandwidth is affected.
-        */
-       if (!dd->ipath_pioavailregs_dma) {
-               ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
-               return;
-       }
-       if (ipath_debug & __IPATH_VERBDBG) {
-               /* only if packet debug and verbose */
-               volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-               unsigned long *shadow = dd->ipath_pioavailshadow;
-
-               ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
-                          "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
-                          "s3=%lx\n",
-                          (unsigned long long) le64_to_cpu(dma[0]),
-                          shadow[0],
-                          (unsigned long long) le64_to_cpu(dma[1]),
-                          shadow[1],
-                          (unsigned long long) le64_to_cpu(dma[2]),
-                          shadow[2],
-                          (unsigned long long) le64_to_cpu(dma[3]),
-                          shadow[3]);
-               if (piobregs > 4)
-                       ipath_cdbg(
-                               PKT, "2nd group, dma4=%llx shad4=%lx, "
-                               "d5=%llx s5=%lx, d6=%llx s6=%lx, "
-                               "d7=%llx s7=%lx\n",
-                               (unsigned long long) le64_to_cpu(dma[4]),
-                               shadow[4],
-                               (unsigned long long) le64_to_cpu(dma[5]),
-                               shadow[5],
-                               (unsigned long long) le64_to_cpu(dma[6]),
-                               shadow[6],
-                               (unsigned long long) le64_to_cpu(dma[7]),
-                               shadow[7]);
-       }
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (i = 0; i < piobregs; i++) {
-               u64 pchbusy, pchg, piov, pnew;
-               /*
-                * Chip Errata: bug 6641; even and odd qwords>3 are swapped
-                */
-               if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-                       piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
-               else
-                       piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
-               pchg = dd->ipath_pioavailkernel[i] &
-                       ~(dd->ipath_pioavailshadow[i] ^ piov);
-               pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
-               if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
-                       pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
-                       pnew |= piov & pchbusy;
-                       dd->ipath_pioavailshadow[i] = pnew;
-               }
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/*
- * used to force update of pioavailshadow if we can't get a pio buffer.
- * Needed primarily due to exitting freeze mode after recovering
- * from errors.  Done lazily, because it's safer (known to not
- * be writing pio buffers).
- */
-static void ipath_reset_availshadow(struct ipath_devdata *dd)
-{
-       int i, im;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (i = 0; i < dd->ipath_pioavregs; i++) {
-               u64 val, oldval;
-               /* deal with 6110 chip bug on high register #s */
-               im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-                       i ^ 1 : i;
-               val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
-               /*
-                * busy out the buffers not in the kernel avail list,
-                * without changing the generation bits.
-                */
-               oldval = dd->ipath_pioavailshadow[i];
-               dd->ipath_pioavailshadow[i] = val |
-                       ((~dd->ipath_pioavailkernel[i] <<
-                       INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
-                       0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
-               if (oldval != dd->ipath_pioavailshadow[i])
-                       ipath_dbg("shadow[%d] was %Lx, now %lx\n",
-                               i, (unsigned long long) oldval,
-                               dd->ipath_pioavailshadow[i]);
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/**
- * ipath_setrcvhdrsize - set the receive header size
- * @dd: the infinipath device
- * @rhdrsize: the receive header size
- *
- * called from user init code, and also layered driver init
- */
-int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
-{
-       int ret = 0;
-
-       if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
-               if (dd->ipath_rcvhdrsize != rhdrsize) {
-                       dev_info(&dd->pcidev->dev,
-                                "Error: can't set protocol header "
-                                "size %u, already %u\n",
-                                rhdrsize, dd->ipath_rcvhdrsize);
-                       ret = -EAGAIN;
-               } else
-                       ipath_cdbg(VERBOSE, "Reuse same protocol header "
-                                  "size %u\n", dd->ipath_rcvhdrsize);
-       } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
-                              (sizeof(u64) / sizeof(u32)))) {
-               ipath_dbg("Error: can't set protocol header size %u "
-                         "(> max %u)\n", rhdrsize,
-                         dd->ipath_rcvhdrentsize -
-                         (u32) (sizeof(u64) / sizeof(u32)));
-               ret = -EOVERFLOW;
-       } else {
-               dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
-               dd->ipath_rcvhdrsize = rhdrsize;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-                                dd->ipath_rcvhdrsize);
-               ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
-                          dd->ipath_rcvhdrsize);
-       }
-       return ret;
-}
-
-/*
- * debugging code and stats updates if no pio buffers available.
- */
-static noinline void no_pio_bufs(struct ipath_devdata *dd)
-{
-       unsigned long *shadow = dd->ipath_pioavailshadow;
-       __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
-
-       dd->ipath_upd_pio_shadow = 1;
-
-       /*
-        * not atomic, but if we lose a stat count in a while, that's OK
-        */
-       ipath_stats.sps_nopiobufs++;
-       if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-               ipath_force_pio_avail_update(dd); /* at start */
-               ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
-                       "%llx %llx %llx %llx\n"
-                       "ipath  shadow:  %lx %lx %lx %lx\n",
-                       dd->ipath_consec_nopiobuf,
-                       (unsigned long)get_cycles(),
-                       (unsigned long long) le64_to_cpu(dma[0]),
-                       (unsigned long long) le64_to_cpu(dma[1]),
-                       (unsigned long long) le64_to_cpu(dma[2]),
-                       (unsigned long long) le64_to_cpu(dma[3]),
-                       shadow[0], shadow[1], shadow[2], shadow[3]);
-               /*
-                * 4 buffers per byte, 4 registers above, cover rest
-                * below
-                */
-               if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
-                   (sizeof(shadow[0]) * 4 * 4))
-                       ipath_dbg("2nd group: dmacopy: "
-                                 "%llx %llx %llx %llx\n"
-                                 "ipath  shadow:  %lx %lx %lx %lx\n",
-                                 (unsigned long long)le64_to_cpu(dma[4]),
-                                 (unsigned long long)le64_to_cpu(dma[5]),
-                                 (unsigned long long)le64_to_cpu(dma[6]),
-                                 (unsigned long long)le64_to_cpu(dma[7]),
-                                 shadow[4], shadow[5], shadow[6], shadow[7]);
-
-               /* at end, so update likely happened */
-               ipath_reset_availshadow(dd);
-       }
-}
-
-/*
- * common code for normal driver pio buffer allocation, and reserved
- * allocation.
- *
- * do appropriate marking as busy, etc.
- * returns buffer number if one found (>=0), negative number is error.
- */
-static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
-       u32 *pbufnum, u32 first, u32 last, u32 firsti)
-{
-       int i, j, updated = 0;
-       unsigned piobcnt;
-       unsigned long flags;
-       unsigned long *shadow = dd->ipath_pioavailshadow;
-       u32 __iomem *buf;
-
-       piobcnt = last - first;
-       if (dd->ipath_upd_pio_shadow) {
-               /*
-                * Minor optimization.  If we had no buffers on last call,
-                * start out by doing the update; continue and do scan even
-                * if no buffers were updated, to be paranoid
-                */
-               ipath_update_pio_bufs(dd);
-               updated++;
-               i = first;
-       } else
-               i = firsti;
-rescan:
-       /*
-        * while test_and_set_bit() is atomic, we do that and then the
-        * change_bit(), and the pair is not.  See if this is the cause
-        * of the remaining armlaunch errors.
-        */
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       for (j = 0; j < piobcnt; j++, i++) {
-               if (i >= last)
-                       i = first;
-               if (__test_and_set_bit((2 * i) + 1, shadow))
-                       continue;
-               /* flip generation bit */
-               __change_bit(2 * i, shadow);
-               break;
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-       if (j == piobcnt) {
-               if (!updated) {
-                       /*
-                        * first time through; shadow exhausted, but may be
-                        * buffers available, try an update and then rescan.
-                        */
-                       ipath_update_pio_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               } else if (updated == 1 && piobcnt <=
-                       ((dd->ipath_sendctrl
-                       >> INFINIPATH_S_UPDTHRESH_SHIFT) &
-                       INFINIPATH_S_UPDTHRESH_MASK)) {
-                       /*
-                        * for chips supporting and using the update
-                        * threshold we need to force an update of the
-                        * in-memory copy if the count is less than the
-                        * thershold, then check one more time.
-                        */
-                       ipath_force_pio_avail_update(dd);
-                       ipath_update_pio_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               }
-
-               no_pio_bufs(dd);
-               buf = NULL;
-       } else {
-               if (i < dd->ipath_piobcnt2k)
-                       buf = (u32 __iomem *) (dd->ipath_pio2kbase +
-                                              i * dd->ipath_palign);
-               else
-                       buf = (u32 __iomem *)
-                               (dd->ipath_pio4kbase +
-                                (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-               if (pbufnum)
-                       *pbufnum = i;
-       }
-
-       return buf;
-}
-
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @plen: the size of the PIO buffer needed in 32-bit words
- * @pbufnum: the buffer number is placed here
- */
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
-{
-       u32 __iomem *buf;
-       u32 pnum, nbufs;
-       u32 first, lasti;
-
-       if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
-               first = dd->ipath_piobcnt2k;
-               lasti = dd->ipath_lastpioindexl;
-       } else {
-               first = 0;
-               lasti = dd->ipath_lastpioindex;
-       }
-       nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
-
-       if (buf) {
-               /*
-                * Set next starting place.  It's just an optimization,
-                * it doesn't matter who wins on this, so no locking
-                */
-               if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-                       dd->ipath_lastpioindexl = pnum + 1;
-               else
-                       dd->ipath_lastpioindex = pnum + 1;
-               if (dd->ipath_upd_pio_shadow)
-                       dd->ipath_upd_pio_shadow = 0;
-               if (dd->ipath_consec_nopiobuf)
-                       dd->ipath_consec_nopiobuf = 0;
-               ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
-                          pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
-               if (pbufnum)
-                       *pbufnum = pnum;
-
-       }
-       return buf;
-}
-
-/**
- * ipath_chg_pioavailkernel - change which send buffers are available for kernel
- * @dd: the infinipath device
- * @start: the starting send buffer number
- * @len: the number of send buffers
- * @avail: true if the buffers are available for kernel use, false otherwise
- */
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-                             unsigned len, int avail)
-{
-       unsigned long flags;
-       unsigned end, cnt = 0;
-
-       /* There are two bits per send buffer (busy and generation) */
-       start *= 2;
-       end = start + len * 2;
-
-       spin_lock_irqsave(&ipath_pioavail_lock, flags);
-       /* Set or clear the busy bit in the shadow. */
-       while (start < end) {
-               if (avail) {
-                       unsigned long dma;
-                       int i, im;
-                       /*
-                        * the BUSY bit will never be set, because we disarm
-                        * the user buffers before we hand them back to the
-                        * kernel.  We do have to make sure the generation
-                        * bit is set correctly in shadow, since it could
-                        * have changed many times while allocated to user.
-                        * We can't use the bitmap functions on the full
-                        * dma array because it is always little-endian, so
-                        * we have to flip to host-order first.
-                        * BITS_PER_LONG is slightly wrong, since it's
-                        * always 64 bits per register in chip...
-                        * We only work on 64 bit kernels, so that's OK.
-                        */
-                       /* deal with 6110 chip bug on high register #s */
-                       i = start / BITS_PER_LONG;
-                       im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-                               i ^ 1 : i;
-                       __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
-                               + start, dd->ipath_pioavailshadow);
-                       dma = (unsigned long) le64_to_cpu(
-                               dd->ipath_pioavailregs_dma[im]);
-                       if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                               + start) % BITS_PER_LONG, &dma))
-                               __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                                       + start, dd->ipath_pioavailshadow);
-                       else
-                               __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-                                       + start, dd->ipath_pioavailshadow);
-                       __set_bit(start, dd->ipath_pioavailkernel);
-               } else {
-                       __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
-                               dd->ipath_pioavailshadow);
-                       __clear_bit(start, dd->ipath_pioavailkernel);
-               }
-               start += 2;
-       }
-
-       if (dd->ipath_pioupd_thresh) {
-               end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-               cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
-       }
-       spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-       /*
-        * When moving buffers from kernel to user, if number assigned to
-        * the user is less than the pio update threshold, and threshold
-        * is supported (cnt was computed > 0), drop the update threshold
-        * so we update at least once per allocated number of buffers.
-        * In any case, if the kernel buffers are less than the threshold,
-        * drop the threshold.  We don't bother increasing it, having once
-        * decreased it, since it would typically just cycle back and forth.
-        * If we don't decrease below buffers in use, we can wait a long
-        * time for an update, until some other context uses PIO buffers.
-        */
-       if (!avail && len < cnt)
-               cnt = len;
-       if (cnt < dd->ipath_pioupd_thresh) {
-               dd->ipath_pioupd_thresh = cnt;
-               ipath_dbg("Decreased pio update threshold to %u\n",
-                       dd->ipath_pioupd_thresh);
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
-                       << INFINIPATH_S_UPDTHRESH_SHIFT);
-               dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-                       << INFINIPATH_S_UPDTHRESH_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-}
-
-/**
- * ipath_create_rcvhdrq - create a receive header queue
- * @dd: the infinipath device
- * @pd: the port data
- *
- * this must be contiguous memory (from an i/o perspective), and must be
- * DMA'able (which means for some systems, it will go through an IOMMU,
- * or be forced into a low address range).
- */
-int ipath_create_rcvhdrq(struct ipath_devdata *dd,
-                        struct ipath_portdata *pd)
-{
-       int ret = 0;
-
-       if (!pd->port_rcvhdrq) {
-               dma_addr_t phys_hdrqtail;
-               gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-               int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-                               sizeof(u32), PAGE_SIZE);
-
-               pd->port_rcvhdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
-                       gfp_flags);
-
-               if (!pd->port_rcvhdrq) {
-                       ipath_dev_err(dd, "attempt to allocate %d bytes "
-                                     "for port %u rcvhdrq failed\n",
-                                     amt, pd->port_port);
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-                       pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
-                               &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
-                               GFP_KERNEL);
-                       if (!pd->port_rcvhdrtail_kvaddr) {
-                               ipath_dev_err(dd, "attempt to allocate 1 page "
-                                       "for port %u rcvhdrqtailaddr "
-                                       "failed\n", pd->port_port);
-                               ret = -ENOMEM;
-                               dma_free_coherent(&dd->pcidev->dev, amt,
-                                       pd->port_rcvhdrq,
-                                       pd->port_rcvhdrq_phys);
-                               pd->port_rcvhdrq = NULL;
-                               goto bail;
-                       }
-                       pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
-                       ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
-                                  "physical\n", pd->port_port,
-                                  (unsigned long long) phys_hdrqtail);
-               }
-
-               pd->port_rcvhdrq_size = amt;
-
-               ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
-                          "for port %u rcvhdr Q\n",
-                          amt >> PAGE_SHIFT, pd->port_rcvhdrq,
-                          (unsigned long) pd->port_rcvhdrq_phys,
-                          (unsigned long) pd->port_rcvhdrq_size,
-                          pd->port_port);
-       } else {
-               ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
-                          "hdrtailaddr@%p %llx physical\n",
-                          pd->port_port, pd->port_rcvhdrq,
-                          (unsigned long long) pd->port_rcvhdrq_phys,
-                          pd->port_rcvhdrtail_kvaddr, (unsigned long long)
-                          pd->port_rcvhdrqtailaddr_phys);
-       }
-       /* clear for security and sanity on each use */
-       memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-       if (pd->port_rcvhdrtail_kvaddr)
-               memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
-
-       /*
-        * tell chip each time we init it, even if we are re-using previous
-        * memory (we zero the register at process close)
-        */
-       ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
-                             pd->port_port, pd->port_rcvhdrqtailaddr_phys);
-       ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-                             pd->port_port, pd->port_rcvhdrq_phys);
-
-bail:
-       return ret;
-}
-
-
-/*
- * Flush all sends that might be in the ready to send state, as well as any
- * that are in the process of being sent.   Used whenever we need to be
- * sure the send side is idle.  Cleans up all buffer state by canceling
- * all pio buffers, and issuing an abort, which cleans up anything in the
- * launch fifo.  The cancel is superfluous on some chip versions, but
- * it's safer to always do it.
- * PIOAvail bits are updated by the chip as if normal send had happened.
- */
-void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
-{
-       unsigned long flags;
-
-       if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
-               ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
-               goto bail;
-       }
-       /*
-        * If we have SDMA, and it's not disabled, we have to kick off the
-        * abort state machine, provided we aren't already aborting.
-        * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
-        * we skip the rest of this routine. It is already "in progress"
-        */
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
-               int skip_cancel;
-               unsigned long *statp = &dd->ipath_sdma_status;
-
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               skip_cancel =
-                       test_and_set_bit(IPATH_SDMA_ABORTING, statp)
-                       && !test_bit(IPATH_SDMA_DISABLED, statp);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-               if (skip_cancel)
-                       goto bail;
-       }
-
-       ipath_dbg("Cancelling all in-progress send buffers\n");
-
-       /* skip armlaunch errs for a while */
-       dd->ipath_lastcancel = jiffies + HZ / 2;
-
-       /*
-        * The abort bit is auto-clearing.  We also don't want pioavail
-        * update happening during this, and we don't want any other
-        * sends going out, so turn those off for the duration.  We read
-        * the scratch register to be sure that cancels and the abort
-        * have taken effect in the chip.  Otherwise two parts are same
-        * as ipath_force_pio_avail_update()
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
-               | INFINIPATH_S_PIOENABLE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-               dd->ipath_sendctrl | INFINIPATH_S_ABORT);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /* disarm all send buffers */
-       ipath_disarm_piobufs(dd, 0,
-               dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-
-       if (restore_sendctrl) {
-               /* else done by caller later if needed */
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
-                       INFINIPATH_S_PIOENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               /* and again, be sure all have hit the chip */
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-
-       if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
-           !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
-           test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               /* only wait so long for intr */
-               dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
-               dd->ipath_sdma_reset_wait = 200;
-               if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       }
-bail:;
-}
-
-/*
- * Force an update of in-memory copy of the pioavail registers, when
- * needed for any of a variety of reasons.  We read the scratch register
- * to make it highly likely that the update will have happened by the
- * time we return.  If already off (as in cancel_sends above), this
- * routine is a nop, on the assumption that the caller will "do the
- * right thing".
- */
-void ipath_force_pio_avail_update(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                       dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
-                               int linitcmd)
-{
-       u64 mod_wd;
-       static const char *what[4] = {
-               [0] = "NOP",
-               [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
-               [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
-               [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
-       };
-
-       if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
-               /*
-                * If we are told to disable, note that so link-recovery
-                * code does not attempt to bring us back up.
-                */
-               preempt_disable();
-               dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-               preempt_enable();
-       } else if (linitcmd) {
-               /*
-                * Any other linkinitcmd will lead to LINKDOWN and then
-                * to INIT (if all is well), so clear flag to let
-                * link-recovery code attempt to bring us back up.
-                */
-               preempt_disable();
-               dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-               preempt_enable();
-       }
-
-       mod_wd = (linkcmd << dd->ibcc_lc_shift) |
-               (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-       ipath_cdbg(VERBOSE,
-               "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
-               dd->ipath_unit, what[linkcmd], linitcmd,
-               ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
-                       ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl | mod_wd);
-       /* read from chip so write is flushed */
-       (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-}
-
-int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
-       u32 lstate;
-       int ret;
-
-       switch (newstate) {
-       case IPATH_IB_LINKDOWN_ONLY:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_POLL);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_SLEEP:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_SLEEP);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKDOWN_DISABLE:
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-                                       INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINKARM:
-               if (dd->ipath_flags & IPATH_LINKARMED) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags &
-                     (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
-
-               /*
-                * Since the port can transition to ACTIVE by receiving
-                * a non VL 15 packet, wait for either state.
-                */
-               lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINKACTIVE:
-               if (dd->ipath_flags & IPATH_LINKACTIVE) {
-                       ret = 0;
-                       goto bail;
-               }
-               if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
-               lstate = IPATH_LINKACTIVE;
-               break;
-
-       case IPATH_IB_LINK_LOOPBACK:
-               dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-
-               /* turn heartbeat off, as it causes loopback to fail */
-               dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                                      IPATH_IB_HRTBT_OFF);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       case IPATH_IB_LINK_EXTERNAL:
-               dev_info(&dd->pcidev->dev,
-                       "Disabling IB local loopback (normal)\n");
-               dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                                      IPATH_IB_HRTBT_ON);
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               /* don't wait */
-               ret = 0;
-               goto bail;
-
-       /*
-        * Heartbeat can be explicitly enabled by the user via
-        * "hrtbt_enable" "file", and if disabled, trying to enable here
-        * will have no effect.  Implicit changes (heartbeat off when
-        * loopback on, and vice versa) are included to ease testing.
-        */
-       case IPATH_IB_LINK_HRTBT:
-               ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                       IPATH_IB_HRTBT_ON);
-               goto bail;
-
-       case IPATH_IB_LINK_NO_HRTBT:
-               ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-                       IPATH_IB_HRTBT_OFF);
-               goto bail;
-
-       default:
-               ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link INIT state...
- */
-int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-       u32 piosize;
-       int changed = 0;
-       int ret;
-
-       /*
-        * mtu is IB data payload max.  It's the largest power of 2 less
-        * than piosize (or even larger, since it only really controls the
-        * largest we can receive; we can send the max of the mtu and
-        * piosize).  We check that it's one of the valid IB sizes.
-        */
-       if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-           (arg != 4096 || !ipath_mtu4096)) {
-               ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (dd->ipath_ibmtu == arg) {
-               ret = 0;        /* same as current */
-               goto bail;
-       }
-
-       piosize = dd->ipath_ibmaxlen;
-       dd->ipath_ibmtu = arg;
-
-       if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-               /* Only if it's not the initial value (or reset to it) */
-               if (piosize != dd->ipath_init_ibmaxlen) {
-                       if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
-                               piosize = dd->ipath_init_ibmaxlen;
-                       dd->ipath_ibmaxlen = piosize;
-                       changed = 1;
-               }
-       } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-               piosize = arg + IPATH_PIO_MAXIBHDR;
-               ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-                          "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-                          arg);
-               dd->ipath_ibmaxlen = piosize;
-               changed = 1;
-       }
-
-       if (changed) {
-               u64 ibc = dd->ipath_ibcctrl, ibdw;
-               /*
-                * update our housekeeping variables, and set IBC max
-                * size, same as init code; max IBC is max we allow in
-                * buffer, less the qword pbc, plus 1 for ICRC, in dwords
-                */
-               dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
-               ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
-               ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-                        dd->ibcc_mpl_shift);
-               ibc |= ibdw << dd->ibcc_mpl_shift;
-               dd->ipath_ibcctrl = ibc;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-               dd->ipath_f_tidtemplate(dd);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
-{
-       dd->ipath_lid = lid;
-       dd->ipath_lmc = lmc;
-
-       dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
-               (~((1U << lmc) - 1)) << 16);
-
-       dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
-
-       return 0;
-}
-
-
-/**
- * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to write
- * @port: the port containing the register
- * @value: the value to write
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
-                         unsigned port, u64 value)
-{
-       u16 where;
-
-       if (port < dd->ipath_portcnt &&
-           (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-            regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-               where = regno + port;
-       else
-               where = -1;
-
-       ipath_write_kreg(dd, where, value);
-}
-
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDS, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
-
-static void ipath_run_led_override(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-       int timeoff;
-       int pidx;
-       u64 lstate, ltstate, val;
-
-       if (!(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       pidx = dd->ipath_led_override_phase++ & 1;
-       dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
-       timeoff = dd->ipath_led_override_timeoff;
-
-       /*
-        * below potentially restores the LED values per current status,
-        * should also possibly setup the traffic-blink register,
-        * but leave that to per-chip functions.
-        */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-       ltstate = ipath_ib_linktrstate(dd, val);
-       lstate = ipath_ib_linkstate(dd, val);
-
-       dd->ipath_f_setextled(dd, lstate, ltstate);
-       mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
-}
-
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
-{
-       int timeoff, freq;
-
-       if (!(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       /* First check if we are blinking. If not, use 1HZ polling */
-       timeoff = HZ;
-       freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
-
-       if (freq) {
-               /* For blink, set each phase from one nybble of val */
-               dd->ipath_led_override_vals[0] = val & 0xF;
-               dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
-               timeoff = (HZ << 4)/freq;
-       } else {
-               /* Non-blink set both phases the same. */
-               dd->ipath_led_override_vals[0] = val & 0xF;
-               dd->ipath_led_override_vals[1] = val & 0xF;
-       }
-       dd->ipath_led_override_timeoff = timeoff;
-
-       /*
-        * If the timer has not already been started, do so. Use a "quick"
-        * timeout so the function will be called soon, to look at our request.
-        */
-       if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
-               /* Need to start timer */
-               setup_timer(&dd->ipath_led_override_timer,
-                               ipath_run_led_override, (unsigned long)dd);
-
-               dd->ipath_led_override_timer.expires = jiffies + 1;
-               add_timer(&dd->ipath_led_override_timer);
-       } else
-               atomic_dec(&dd->ipath_led_override_timer_active);
-}
-
-/**
- * ipath_shutdown_device - shut down a device
- * @dd: the infinipath device
- *
- * This is called to make the device quiet when we are about to
- * unload the driver, and also when the device is administratively
- * disabled.   It does not free any data structures.
- * Everything it does has to be setup again by ipath_init_chip(dd,1)
- */
-void ipath_shutdown_device(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       ipath_dbg("Shutting down the device\n");
-
-       ipath_hol_up(dd); /* make sure user processes aren't suspended */
-
-       dd->ipath_flags |= IPATH_LINKUNK;
-       dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
-                            IPATH_LINKINIT | IPATH_LINKARMED |
-                            IPATH_LINKACTIVE);
-       *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
-                               IPATH_STATUS_IB_READY);
-
-       /* mask interrupts, but not errors */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-       dd->ipath_rcvctrl = 0;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               teardown_sdma(dd);
-
-       /*
-        * gracefully stop all sends allowing any in progress to trickle out
-        * first.
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl = 0;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       /* flush it */
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /*
-        * enough for anything that's going to trickle out to have actually
-        * done so.
-        */
-       udelay(5);
-
-       dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
-
-       ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-       ipath_cancel_sends(dd, 0);
-
-       /*
-        * we are shutting down, so tell components that care.  We don't do
-        * this on just a link state change, much like ethernet, a cable
-        * unplug, etc. doesn't change driver state
-        */
-       signal_ib_event(dd, IB_EVENT_PORT_ERR);
-
-       /* disable IBC */
-       dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control | INFINIPATH_C_FREEZEMODE);
-
-       /*
-        * clear SerdesEnable and turn the leds off; do this here because
-        * we are unloading, so don't count on interrupts to move along
-        * Turn the LEDs off explicitly for the same reason.
-        */
-       dd->ipath_f_quiet_serdes(dd);
-
-       /* stop all the timers that might still be running */
-       del_timer_sync(&dd->ipath_hol_timer);
-       if (dd->ipath_stats_timer_active) {
-               del_timer_sync(&dd->ipath_stats_timer);
-               dd->ipath_stats_timer_active = 0;
-       }
-       if (dd->ipath_intrchk_timer.data) {
-               del_timer_sync(&dd->ipath_intrchk_timer);
-               dd->ipath_intrchk_timer.data = 0;
-       }
-       if (atomic_read(&dd->ipath_led_override_timer_active)) {
-               del_timer_sync(&dd->ipath_led_override_timer);
-               atomic_set(&dd->ipath_led_override_timer_active, 0);
-       }
-
-       /*
-        * clear all interrupts and errors, so that the next time the driver
-        * is loaded or device is enabled, we know that whatever is set
-        * happened while we were unloaded
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-       ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
-       ipath_update_eeprom_log(dd);
-}
-
-/**
- * ipath_free_pddata - free a port's allocated data
- * @dd: the infinipath device
- * @pd: the portdata structure
- *
- * free up any allocated data for a port
- * This should not touch anything that would affect a simultaneous
- * re-allocation of port data, because it is called after ipath_mutex
- * is released (and can be called from reinit as well).
- * It should never change any chip state, or global driver state.
- * (The only exception to global state is freeing the port0 port0_skbs.)
- */
-void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
-{
-       if (!pd)
-               return;
-
-       if (pd->port_rcvhdrq) {
-               ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
-                          "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
-                          (unsigned long) pd->port_rcvhdrq_size);
-               dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
-                                 pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
-               pd->port_rcvhdrq = NULL;
-               if (pd->port_rcvhdrtail_kvaddr) {
-                       dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                        pd->port_rcvhdrtail_kvaddr,
-                                        pd->port_rcvhdrqtailaddr_phys);
-                       pd->port_rcvhdrtail_kvaddr = NULL;
-               }
-       }
-       if (pd->port_port && pd->port_rcvegrbuf) {
-               unsigned e;
-
-               for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-                       void *base = pd->port_rcvegrbuf[e];
-                       size_t size = pd->port_rcvegrbuf_size;
-
-                       ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
-                                  "chunk %u/%u\n", base,
-                                  (unsigned long) size,
-                                  e, pd->port_rcvegrbuf_chunks);
-                       dma_free_coherent(&dd->pcidev->dev, size,
-                               base, pd->port_rcvegrbuf_phys[e]);
-               }
-               kfree(pd->port_rcvegrbuf);
-               pd->port_rcvegrbuf = NULL;
-               kfree(pd->port_rcvegrbuf_phys);
-               pd->port_rcvegrbuf_phys = NULL;
-               pd->port_rcvegrbuf_chunks = 0;
-       } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
-               unsigned e;
-               struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
-
-               dd->ipath_port0_skbinfo = NULL;
-               ipath_cdbg(VERBOSE, "free closed port %d "
-                          "ipath_port0_skbinfo @ %p\n", pd->port_port,
-                          skbinfo);
-               for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
-                       if (skbinfo[e].skb) {
-                               pci_unmap_single(dd->pcidev, skbinfo[e].phys,
-                                                dd->ipath_ibmaxlen,
-                                                PCI_DMA_FROMDEVICE);
-                               dev_kfree_skb(skbinfo[e].skb);
-                       }
-               vfree(skbinfo);
-       }
-       kfree(pd->port_tid_pg_list);
-       vfree(pd->subport_uregbase);
-       vfree(pd->subport_rcvegrbuf);
-       vfree(pd->subport_rcvhdr_base);
-       kfree(pd);
-}
-
-static int __init infinipath_init(void)
-{
-       int ret;
-
-       if (ipath_debug & __IPATH_DBG)
-               printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
-
-       /*
-        * These must be called before the driver is registered with
-        * the PCI subsystem.
-        */
-       idr_init(&unit_table);
-
-       ret = pci_register_driver(&ipath_driver);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Unable to register driver: error %d\n", -ret);
-               goto bail_unit;
-       }
-
-       ret = ipath_init_ipathfs();
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
-                      "ipathfs: error %d\n", -ret);
-               goto bail_pci;
-       }
-
-       goto bail;
-
-bail_pci:
-       pci_unregister_driver(&ipath_driver);
-
-bail_unit:
-       idr_destroy(&unit_table);
-
-bail:
-       return ret;
-}
-
-static void __exit infinipath_cleanup(void)
-{
-       ipath_exit_ipathfs();
-
-       ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
-       pci_unregister_driver(&ipath_driver);
-
-       idr_destroy(&unit_table);
-}
-
-/**
- * ipath_reset_device - reset the chip if possible
- * @unit: the device to reset
- *
- * Whether or not reset is successful, we attempt to re-initialize the chip
- * (that is, much like a driver unload/reload).  We clear the INITTED flag
- * so that the various entry points will fail until we reinitialize.  For
- * now, we only allow this if no user ports are open that use chip resources
- */
-int ipath_reset_device(int unit)
-{
-       int ret, i;
-       struct ipath_devdata *dd = ipath_lookup(unit);
-       unsigned long flags;
-
-       if (!dd) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (atomic_read(&dd->ipath_led_override_timer_active)) {
-               /* Need to stop LED timer, _then_ shut off LEDs */
-               del_timer_sync(&dd->ipath_led_override_timer);
-               atomic_set(&dd->ipath_led_override_timer_active, 0);
-       }
-
-       /* Shut off LEDs after we are sure timer is not running */
-       dd->ipath_led_override = LED_OVER_BOTH_OFF;
-       dd->ipath_f_setextled(dd, 0, 0);
-
-       dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
-
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
-               dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
-                        "not initialized or not present\n", unit);
-               ret = -ENXIO;
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       if (dd->ipath_pd)
-               for (i = 1; i < dd->ipath_cfgports; i++) {
-                       if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-                               continue;
-                       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-                       ipath_dbg("unit %u port %d is in use "
-                                 "(PID %u cmd %s), can't reset\n",
-                                 unit, i,
-                                 pid_nr(dd->ipath_pd[i]->port_pid),
-                                 dd->ipath_pd[i]->port_comm);
-                       ret = -EBUSY;
-                       goto bail;
-               }
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               teardown_sdma(dd);
-
-       dd->ipath_flags &= ~IPATH_INITTED;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-       ret = dd->ipath_f_reset(dd);
-       if (ret == 1) {
-               ipath_dbg("Reinitializing unit %u after reset attempt\n",
-                         unit);
-               ret = ipath_init_chip(dd, 1);
-       } else
-               ret = -EAGAIN;
-       if (ret)
-               ipath_dev_err(dd, "Reinitialize unit %u after "
-                             "reset failed with %d\n", unit, ret);
-       else
-               dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
-                        "resetting\n", unit);
-
-bail:
-       return ret;
-}
-
-/*
- * send a signal to all the processes that have the driver open
- * through the normal interfaces (i.e., everything other than diags
- * interface).  Returns number of signalled processes.
- */
-static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
-{
-       int i, sub, any = 0;
-       struct pid *pid;
-       unsigned long flags;
-
-       if (!dd->ipath_pd)
-               return 0;
-
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-                       continue;
-               pid = dd->ipath_pd[i]->port_pid;
-               if (!pid)
-                       continue;
-
-               dev_info(&dd->pcidev->dev, "context %d in use "
-                         "(PID %u), sending signal %d\n",
-                         i, pid_nr(pid), sig);
-               kill_pid(pid, sig, 1);
-               any++;
-               for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
-                       pid = dd->ipath_pd[i]->port_subpid[sub];
-                       if (!pid)
-                               continue;
-                       dev_info(&dd->pcidev->dev, "sub-context "
-                               "%d:%d in use (PID %u), sending "
-                               "signal %d\n", i, sub, pid_nr(pid), sig);
-                       kill_pid(pid, sig, 1);
-                       any++;
-               }
-       }
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-       return any;
-}
-
-static void ipath_hol_signal_down(struct ipath_devdata *dd)
-{
-       if (ipath_signal_procs(dd, SIGSTOP))
-               ipath_dbg("Stopped some processes\n");
-       ipath_cancel_sends(dd, 1);
-}
-
-
-static void ipath_hol_signal_up(struct ipath_devdata *dd)
-{
-       if (ipath_signal_procs(dd, SIGCONT))
-               ipath_dbg("Continued some processes\n");
-}
-
-/*
- * link is down, stop any users processes, and flush pending sends
- * to prevent HoL blocking, then start the HoL timer that
- * periodically continues, then stop procs, so they can detect
- * link down if they want, and do something about it.
- * Timer may already be running, so use mod_timer, not add_timer.
- */
-void ipath_hol_down(struct ipath_devdata *dd)
-{
-       dd->ipath_hol_state = IPATH_HOL_DOWN;
-       ipath_hol_signal_down(dd);
-       dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-       dd->ipath_hol_timer.expires = jiffies +
-               msecs_to_jiffies(ipath_hol_timeout_ms);
-       mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
-}
-
-/*
- * link is up, continue any user processes, and ensure timer
- * is a nop, if running.  Let timer keep running, if set; it
- * will nop when it sees the link is up
- */
-void ipath_hol_up(struct ipath_devdata *dd)
-{
-       ipath_hol_signal_up(dd);
-       dd->ipath_hol_state = IPATH_HOL_UP;
-}
-
-/*
- * toggle the running/not running state of user proceses
- * to prevent HoL blocking on chip resources, but still allow
- * user processes to do link down special case handling.
- * Should only be called via the timer
- */
-void ipath_hol_event(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
-               && dd->ipath_hol_state != IPATH_HOL_UP) {
-               dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-               ipath_dbg("Stopping processes\n");
-               ipath_hol_signal_down(dd);
-       } else { /* may do "extra" if also in ipath_hol_up() */
-               dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
-               ipath_dbg("Continuing processes\n");
-               ipath_hol_signal_up(dd);
-       }
-       if (dd->ipath_hol_state == IPATH_HOL_UP)
-               ipath_dbg("link's up, don't resched timer\n");
-       else {
-               dd->ipath_hol_timer.expires = jiffies +
-                       msecs_to_jiffies(ipath_hol_timeout_ms);
-               mod_timer(&dd->ipath_hol_timer,
-                       dd->ipath_hol_timer.expires);
-       }
-}
-
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
-{
-       u64 val;
-
-       if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
-               return -1;
-       if (dd->ipath_rx_pol_inv != new_pol_inv) {
-               dd->ipath_rx_pol_inv = new_pol_inv;
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT);
-               val |= ((u64)dd->ipath_rx_pol_inv) <<
-                       INFINIPATH_XGXS_RX_POL_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-       }
-       return 0;
-}
-
-/*
- * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
- * the 7220, which is count-based, rather than trigger-based.  Safe for the
- * driver check, since it's at init.   Not completely safe when used for
- * user-mode checking, since some error checking can be lost, but not
- * particularly risky, and only has problematic side-effects in the face of
- * very buggy user code.  There is no reference counting, but that's also
- * fine, given the intended use.
- */
-void ipath_enable_armlaunch(struct ipath_devdata *dd)
-{
-       dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-               INFINIPATH_E_SPIOARMLAUNCH);
-       dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-}
-
-void ipath_disable_armlaunch(struct ipath_devdata *dd)
-{
-       /* so don't re-enable if already set */
-       dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-}
-
-module_init(infinipath_init);
-module_exit(infinipath_cleanup);
diff --git a/drivers/staging/rdma/ipath/ipath_eeprom.c b/drivers/staging/rdma/ipath/ipath_eeprom.c
deleted file mode 100644 (file)
index ef84107..0000000
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-
-/*
- * InfiniPath I2C driver for a serial eeprom.  This is not a generic
- * I2C interface.  For a start, the device we're using (Atmel AT24C11)
- * doesn't work like a regular I2C device.  It looks like one
- * electrically, but not logically.  Normal I2C devices have a single
- * 7-bit or 10-bit I2C address that they respond to.  Valid 7-bit
- * addresses range from 0x03 to 0x77.  Addresses 0x00 to 0x02 and 0x78
- * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
- * call" address.)  The Atmel device, on the other hand, responds to ALL
- * 7-bit addresses.  It's designed to be the only device on a given I2C
- * bus.  A 7-bit address corresponds to the memory address within the
- * Atmel device itself.
- *
- * Also, the timing requirements mean more than simple software
- * bitbanging, with readbacks from chip to ensure timing (simple udelay
- * is not enough).
- *
- * This all means that accessing the device is specialized enough
- * that using the standard kernel I2C bitbanging interface would be
- * impossible.  For example, the core I2C eeprom driver expects to find
- * a device at one or more of a limited set of addresses only.  It doesn't
- * allow writing to an eeprom.  It also doesn't provide any means of
- * accessing eeprom contents from within the kernel, only via sysfs.
- */
-
-/* Added functionality for IBA7220-based cards */
-#define IPATH_EEPROM_DEV_V1 0xA0
-#define IPATH_EEPROM_DEV_V2 0xA2
-#define IPATH_TEMP_DEV 0x98
-#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
-#define IPATH_NO_DEV (0xFF)
-
-/*
- * The number of I2C chains is proliferating. Table below brings
- * some order to the madness. The basic principle is that the
- * table is scanned from the top, and a "probe" is made to the
- * device probe_dev. If that succeeds, the chain is considered
- * to be of that type, and dd->i2c_chain_type is set to the index+1
- * of the entry.
- * The +1 is so static initialization can mean "unknown, do probe."
- */
-static struct i2c_chain_desc {
-       u8 probe_dev;   /* If seen at probe, chain is this type */
-       u8 eeprom_dev;  /* Dev addr (if any) for EEPROM */
-       u8 temp_dev;    /* Dev Addr (if any) for Temp-sense */
-} i2c_chains[] = {
-       { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
-       { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
-       { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
-       { IPATH_NO_DEV }
-};
-
-enum i2c_type {
-       i2c_line_scl = 0,
-       i2c_line_sda
-};
-
-enum i2c_state {
-       i2c_line_low = 0,
-       i2c_line_high
-};
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_gpio_set - set a GPIO line
- * @dd: the infinipath device
- * @line: the line to set
- * @new_line_state: the state to set
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.
- */
-static int i2c_gpio_set(struct ipath_devdata *dd,
-                       enum i2c_type line,
-                       enum i2c_state new_line_state)
-{
-       u64 out_mask, dir_mask, *gpioval;
-       unsigned long flags = 0;
-
-       gpioval = &dd->ipath_gpio_out;
-
-       if (line == i2c_line_scl) {
-               dir_mask = dd->ipath_gpio_scl;
-               out_mask = (1UL << dd->ipath_gpio_scl_num);
-       } else {
-               dir_mask = dd->ipath_gpio_sda;
-               out_mask = (1UL << dd->ipath_gpio_sda_num);
-       }
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       if (new_line_state == i2c_line_high) {
-               /* tri-state the output rather than force high */
-               dd->ipath_extctrl &= ~dir_mask;
-       } else {
-               /* config line to be an output */
-               dd->ipath_extctrl |= dir_mask;
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-
-       /* set output as well (no real verify) */
-       if (new_line_state == i2c_line_high)
-               *gpioval |= out_mask;
-       else
-               *gpioval &= ~out_mask;
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       return 0;
-}
-
-/**
- * i2c_gpio_get - get a GPIO line state
- * @dd: the infinipath device
- * @line: the line to get
- * @curr_statep: where to put the line state
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.  curr_state is not set on error.
- */
-static int i2c_gpio_get(struct ipath_devdata *dd,
-                       enum i2c_type line,
-                       enum i2c_state *curr_statep)
-{
-       u64 read_val, mask;
-       int ret;
-       unsigned long flags = 0;
-
-       /* check args */
-       if (curr_statep == NULL) {
-               ret = 1;
-               goto bail;
-       }
-
-       /* config line to be an input */
-       if (line == i2c_line_scl)
-               mask = dd->ipath_gpio_scl;
-       else
-               mask = dd->ipath_gpio_sda;
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       dd->ipath_extctrl &= ~mask;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-       /*
-        * Below is very unlikely to reflect true input state if Output
-        * Enable actually changed.
-        */
-       read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       if (read_val & mask)
-               *curr_statep = i2c_line_high;
-       else
-               *curr_statep = i2c_line_low;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the infinipath device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip.  Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct ipath_devdata *dd)
-{
-       (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-       rmb();
-}
-
-static void scl_out(struct ipath_devdata *dd, u8 bit)
-{
-       udelay(1);
-       i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
-
-       i2c_wait_for_writes(dd);
-}
-
-static void sda_out(struct ipath_devdata *dd, u8 bit)
-{
-       i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
-
-       i2c_wait_for_writes(dd);
-}
-
-static u8 sda_in(struct ipath_devdata *dd, int wait)
-{
-       enum i2c_state bit;
-
-       if (i2c_gpio_get(dd, i2c_line_sda, &bit))
-               ipath_dbg("get bit failed!\n");
-
-       if (wait)
-               i2c_wait_for_writes(dd);
-
-       return bit == i2c_line_high ? 1U : 0;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the infinipath device
- */
-static int i2c_ackrcv(struct ipath_devdata *dd)
-{
-       u8 ack_received;
-
-       /* AT ENTRY SCL = LOW */
-       /* change direction, ignore data */
-       ack_received = sda_in(dd, 1);
-       scl_out(dd, i2c_line_high);
-       ack_received = sda_in(dd, 1) == 0;
-       scl_out(dd, i2c_line_low);
-       return ack_received;
-}
-
-/**
- * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
- * @dd: the infinipath device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct ipath_devdata *dd)
-{
-       int bit_cntr, data;
-
-       data = 0;
-
-       for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
-               data <<= 1;
-               scl_out(dd, i2c_line_high);
-               data |= sda_in(dd, 0);
-               scl_out(dd, i2c_line_low);
-       }
-       return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the infinipath device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct ipath_devdata *dd, u8 data)
-{
-       int bit_cntr;
-       u8 bit;
-
-       for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
-               bit = (data >> bit_cntr) & 1;
-               sda_out(dd, bit);
-               scl_out(dd, i2c_line_high);
-               scl_out(dd, i2c_line_low);
-       }
-       return (!i2c_ackrcv(dd)) ? 1 : 0;
-}
-
-static void send_ack(struct ipath_devdata *dd)
-{
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_high);
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_high);
-}
-
-/**
- * i2c_startcmd - transmit the start condition, followed by address/cmd
- * @dd: the infinipath device
- * @offset_dir: direction byte
- *
- *      (both clock/data high, clock high, data low while clock is high)
- */
-static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
-{
-       int res;
-
-       /* issue start sequence */
-       sda_out(dd, i2c_line_high);
-       scl_out(dd, i2c_line_high);
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_low);
-
-       /* issue length and direction byte */
-       res = wr_byte(dd, offset_dir);
-
-       if (res)
-               ipath_cdbg(VERBOSE, "No ack to complete start\n");
-
-       return res;
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the infinipath device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct ipath_devdata *dd)
-{
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_low);
-       scl_out(dd, i2c_line_high);
-       sda_out(dd, i2c_line_high);
-       udelay(2);
-}
-
-/**
- * eeprom_reset - reset I2C communication
- * @dd: the infinipath device
- */
-
-static int eeprom_reset(struct ipath_devdata *dd)
-{
-       int clock_cycles_left = 9;
-       u64 *gpioval = &dd->ipath_gpio_out;
-       int ret;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       /* Make sure shadows are consistent */
-       dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-       *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-       ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
-                  "is %llx\n", (unsigned long long) *gpioval);
-
-       /*
-        * This is to get the i2c into a known state, by first going low,
-        * then tristate sda (and then tristate scl as first thing
-        * in loop)
-        */
-       scl_out(dd, i2c_line_low);
-       sda_out(dd, i2c_line_high);
-
-       /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
-       while (clock_cycles_left--) {
-               scl_out(dd, i2c_line_high);
-
-               /* SDA seen high, issue START by dropping it while SCL high */
-               if (sda_in(dd, 0)) {
-                       sda_out(dd, i2c_line_low);
-                       scl_out(dd, i2c_line_low);
-                       /* ATMEL spec says must be followed by STOP. */
-                       scl_out(dd, i2c_line_high);
-                       sda_out(dd, i2c_line_high);
-                       ret = 0;
-                       goto bail;
-               }
-
-               scl_out(dd, i2c_line_low);
-       }
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/*
- * Probe for I2C device at specified address. Returns 0 for "success"
- * to match rest of this file.
- * Leave bus in "reasonable" state for further commands.
- */
-static int i2c_probe(struct ipath_devdata *dd, int devaddr)
-{
-       int ret;
-
-       ret = eeprom_reset(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
-                             devaddr);
-               return ret;
-       }
-       /*
-        * Reset no longer leaves bus in start condition, so normal
-        * i2c_startcmd() will do.
-        */
-       ret = i2c_startcmd(dd, devaddr | READ_CMD);
-       if (ret)
-               ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
-                          devaddr);
-       else {
-               /*
-                * Device did respond. Complete a single-byte read, because some
-                * devices apparently cannot handle STOP immediately after they
-                * ACK the start-cmd.
-                */
-               int data;
-               data = rd_byte(dd);
-               stop_cmd(dd);
-               ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
-       }
-       return ret;
-}
-
-/*
- * Returns the "i2c type". This is a pointer to a struct that describes
- * the I2C chain on this board. To minimize impact on struct ipath_devdata,
- * the (small integer) index into the table is actually memoized, rather
- * then the pointer.
- * Memoization is because the type is determined on the first call per chip.
- * An alternative would be to move type determination to early
- * init code.
- */
-static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
-{
-       int idx;
-
-       /* Get memoized index, from previous successful probes */
-       idx = dd->ipath_i2c_chain_type - 1;
-       if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
-               goto done;
-
-       idx = 0;
-       while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
-               /* if probe succeeds, this is type */
-               if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
-                       break;
-               ++idx;
-       }
-
-       /*
-        * Old EEPROM (first entry) may require a reset after probe,
-        * rather than being able to "start" after "stop"
-        */
-       if (idx == 0)
-               eeprom_reset(dd);
-
-       if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
-               idx = -1;
-       else
-               dd->ipath_i2c_chain_type = idx + 1;
-done:
-       return (idx >= 0) ? i2c_chains + idx : NULL;
-}
-
-static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
-                                       u8 eeprom_offset, void *buffer, int len)
-{
-       int ret;
-       struct i2c_chain_desc *icd;
-       u8 *bp = buffer;
-
-       ret = 1;
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->eeprom_dev == IPATH_NO_DEV) {
-               /* legacy not-really-I2C */
-               ipath_cdbg(VERBOSE, "Start command only address\n");
-               eeprom_offset = (eeprom_offset << 1) | READ_CMD;
-               ret = i2c_startcmd(dd, eeprom_offset);
-       } else {
-               /* Actual I2C */
-               ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
-               if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-                       ipath_dbg("Failed EEPROM startcmd\n");
-                       stop_cmd(dd);
-                       ret = 1;
-                       goto bail;
-               }
-               ret = wr_byte(dd, eeprom_offset);
-               stop_cmd(dd);
-               if (ret) {
-                       ipath_dev_err(dd, "Failed to write EEPROM address\n");
-                       ret = 1;
-                       goto bail;
-               }
-               ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
-       }
-       if (ret) {
-               ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
-               stop_cmd(dd);
-               ret = 1;
-               goto bail;
-       }
-
-       /*
-        * eeprom keeps clocking data out as long as we ack, automatically
-        * incrementing the address.
-        */
-       while (len-- > 0) {
-               /* get and store data */
-               *bp++ = rd_byte(dd);
-               /* send ack if not the last byte */
-               if (len)
-                       send_ack(dd);
-       }
-
-       stop_cmd(dd);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                                      const void *buffer, int len)
-{
-       int sub_len;
-       const u8 *bp = buffer;
-       int max_wait_time, i;
-       int ret;
-       struct i2c_chain_desc *icd;
-
-       ret = 1;
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       while (len > 0) {
-               if (icd->eeprom_dev == IPATH_NO_DEV) {
-                       if (i2c_startcmd(dd,
-                                        (eeprom_offset << 1) | WRITE_CMD)) {
-                               ipath_dbg("Failed to start cmd offset %u\n",
-                                       eeprom_offset);
-                               goto failed_write;
-                       }
-               } else {
-                       /* Real I2C */
-                       if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-                               ipath_dbg("Failed EEPROM startcmd\n");
-                               goto failed_write;
-                       }
-                       ret = wr_byte(dd, eeprom_offset);
-                       if (ret) {
-                               ipath_dev_err(dd, "Failed to write EEPROM "
-                                             "address\n");
-                               goto failed_write;
-                       }
-               }
-
-               sub_len = min(len, 4);
-               eeprom_offset += sub_len;
-               len -= sub_len;
-
-               for (i = 0; i < sub_len; i++) {
-                       if (wr_byte(dd, *bp++)) {
-                               ipath_dbg("no ack after byte %u/%u (%u "
-                                         "total remain)\n", i, sub_len,
-                                         len + sub_len - i);
-                               goto failed_write;
-                       }
-               }
-
-               stop_cmd(dd);
-
-               /*
-                * wait for write complete by waiting for a successful
-                * read (the chip replies with a zero after the write
-                * cmd completes, and before it writes to the eeprom.
-                * The startcmd for the read will fail the ack until
-                * the writes have completed.   We do this inline to avoid
-                * the debug prints that are in the real read routine
-                * if the startcmd fails.
-                * We also use the proper device address, so it doesn't matter
-                * whether we have real eeprom_dev. legacy likes any address.
-                */
-               max_wait_time = 100;
-               while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
-                       stop_cmd(dd);
-                       if (!--max_wait_time) {
-                               ipath_dbg("Did not get successful read to "
-                                         "complete write\n");
-                               goto failed_write;
-                       }
-               }
-               /* now read (and ignore) the resulting byte */
-               rd_byte(dd);
-               stop_cmd(dd);
-       }
-
-       ret = 0;
-       goto bail;
-
-failed_write:
-       stop_cmd(dd);
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
- */
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-                       void *buff, int len)
-{
-       int ret;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       return ret;
-}
-
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-                       const void *buff, int len)
-{
-       int ret;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       return ret;
-}
-
-static u8 flash_csum(struct ipath_flash *ifp, int adjust)
-{
-       u8 *ip = (u8 *) ifp;
-       u8 csum = 0, len;
-
-       /*
-        * Limit length checksummed to max length of actual data.
-        * Checksum of erased eeprom will still be bad, but we avoid
-        * reading past the end of the buffer we were passed.
-        */
-       len = ifp->if_length;
-       if (len > sizeof(struct ipath_flash))
-               len = sizeof(struct ipath_flash);
-       while (len--)
-               csum += *ip++;
-       csum -= ifp->if_csum;
-       csum = ~csum;
-       if (adjust)
-               ifp->if_csum = csum;
-
-       return csum;
-}
-
-/**
- * ipath_get_guid - get the GUID from the i2c device
- * @dd: the infinipath device
- *
- * We have the capability to use the ipath_nguid field, and get
- * the guid from the first chip's flash, to use for all of them.
- */
-void ipath_get_eeprom_info(struct ipath_devdata *dd)
-{
-       void *buf;
-       struct ipath_flash *ifp;
-       __be64 guid;
-       int len, eep_stat;
-       u8 csum, *bguid;
-       int t = dd->ipath_unit;
-       struct ipath_devdata *dd0 = ipath_lookup(0);
-
-       if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
-               u8 oguid;
-               dd->ipath_guid = dd0->ipath_guid;
-               bguid = (u8 *) & dd->ipath_guid;
-
-               oguid = bguid[7];
-               bguid[7] += t;
-               if (oguid > bguid[7]) {
-                       if (bguid[6] == 0xff) {
-                               if (bguid[5] == 0xff) {
-                                       ipath_dev_err(
-                                               dd,
-                                               "Can't set %s GUID from "
-                                               "base, wraps to OUI!\n",
-                                               ipath_get_unit_name(t));
-                                       dd->ipath_guid = 0;
-                                       goto bail;
-                               }
-                               bguid[5]++;
-                       }
-                       bguid[6]++;
-               }
-               dd->ipath_nguid = 1;
-
-               ipath_dbg("nguid %u, so adding %u to device 0 guid, "
-                         "for %llx\n",
-                         dd0->ipath_nguid, t,
-                         (unsigned long long) be64_to_cpu(dd->ipath_guid));
-               goto bail;
-       }
-
-       /*
-        * read full flash, not just currently used part, since it may have
-        * been written with a newer definition
-        * */
-       len = sizeof(struct ipath_flash);
-       buf = vmalloc(len);
-       if (!buf) {
-               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-                             "bytes from eeprom for GUID\n", len);
-               goto bail;
-       }
-
-       mutex_lock(&dd->ipath_eep_lock);
-       eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
-       mutex_unlock(&dd->ipath_eep_lock);
-
-       if (eep_stat) {
-               ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
-               goto done;
-       }
-       ifp = (struct ipath_flash *)buf;
-
-       csum = flash_csum(ifp, 0);
-       if (csum != ifp->if_csum) {
-               dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
-                        "0x%x, not 0x%x\n", csum, ifp->if_csum);
-               goto done;
-       }
-       if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
-           *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
-               ipath_dev_err(dd, "Invalid GUID %llx from flash; "
-                             "ignoring\n",
-                             *(unsigned long long *) ifp->if_guid);
-               /* don't allow GUID if all 0 or all 1's */
-               goto done;
-       }
-
-       /* complain, but allow it */
-       if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
-               dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
-                        "default, probably not correct!\n",
-                        *(unsigned long long *) ifp->if_guid);
-
-       bguid = ifp->if_guid;
-       if (!bguid[0] && !bguid[1] && !bguid[2]) {
-               /* original incorrect GUID format in flash; fix in
-                * core copy, by shifting up 2 octets; don't need to
-                * change top octet, since both it and shifted are
-                * 0.. */
-               bguid[1] = bguid[3];
-               bguid[2] = bguid[4];
-               bguid[3] = bguid[4] = 0;
-               guid = *(__be64 *) ifp->if_guid;
-               ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
-                          "shifting 2 octets\n");
-       } else
-               guid = *(__be64 *) ifp->if_guid;
-       dd->ipath_guid = guid;
-       dd->ipath_nguid = ifp->if_numguid;
-       /*
-        * Things are slightly complicated by the desire to transparently
-        * support both the Pathscale 10-digit serial number and the QLogic
-        * 13-character version.
-        */
-       if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
-               && ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
-               /* This board has a Serial-prefix, which is stored
-                * elsewhere for backward-compatibility.
-                */
-               char *snp = dd->ipath_serial;
-               memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
-               snp[sizeof ifp->if_sprefix] = '\0';
-               len = strlen(snp);
-               snp += len;
-               len = (sizeof dd->ipath_serial) - len;
-               if (len > sizeof ifp->if_serial) {
-                       len = sizeof ifp->if_serial;
-               }
-               memcpy(snp, ifp->if_serial, len);
-       } else
-               memcpy(dd->ipath_serial, ifp->if_serial,
-                      sizeof ifp->if_serial);
-       if (!strstr(ifp->if_comment, "Tested successfully"))
-               ipath_dev_err(dd, "Board SN %s did not pass functional "
-                       "test: %s\n", dd->ipath_serial,
-                       ifp->if_comment);
-
-       ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
-                  (unsigned long long) be64_to_cpu(dd->ipath_guid));
-
-       memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
-       /*
-        * Power-on (actually "active") hours are kept as little-endian value
-        * in EEPROM, but as seconds in a (possibly as small as 24-bit)
-        * atomic_t while running.
-        */
-       atomic_set(&dd->ipath_active_time, 0);
-       dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
-
-done:
-       vfree(buf);
-
-bail:;
-}
-
-/**
- * ipath_update_eeprom_log - copy active-time and error counters to eeprom
- * @dd: the infinipath device
- *
- * Although the time is kept as seconds in the ipath_devdata struct, it is
- * rounded to hours for re-write, as we have only 16 bits in EEPROM.
- * First-cut code reads whole (expected) struct ipath_flash, modifies,
- * re-writes. Future direction: read/write only what we need, assuming
- * that the EEPROM had to have been "good enough" for driver init, and
- * if not, we aren't making it worse.
- *
- */
-
-int ipath_update_eeprom_log(struct ipath_devdata *dd)
-{
-       void *buf;
-       struct ipath_flash *ifp;
-       int len, hi_water;
-       uint32_t new_time, new_hrs;
-       u8 csum;
-       int ret, idx;
-       unsigned long flags;
-
-       /* first, check if we actually need to do anything. */
-       ret = 0;
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               if (dd->ipath_eep_st_new_errs[idx]) {
-                       ret = 1;
-                       break;
-               }
-       }
-       new_time = atomic_read(&dd->ipath_active_time);
-
-       if (ret == 0 && new_time < 3600)
-               return 0;
-
-       /*
-        * The quick-check above determined that there is something worthy
-        * of logging, so get current contents and do a more detailed idea.
-        * read full flash, not just currently used part, since it may have
-        * been written with a newer definition
-        */
-       len = sizeof(struct ipath_flash);
-       buf = vmalloc(len);
-       ret = 1;
-       if (!buf) {
-               ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-                               "bytes from eeprom for logging\n", len);
-               goto bail;
-       }
-
-       /* Grab semaphore and read current EEPROM. If we get an
-        * error, let go, but if not, keep it until we finish write.
-        */
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (ret) {
-               ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
-               goto free_bail;
-       }
-       ret = ipath_eeprom_internal_read(dd, 0, buf, len);
-       if (ret) {
-               mutex_unlock(&dd->ipath_eep_lock);
-               ipath_dev_err(dd, "Unable read EEPROM for logging\n");
-               goto free_bail;
-       }
-       ifp = (struct ipath_flash *)buf;
-
-       csum = flash_csum(ifp, 0);
-       if (csum != ifp->if_csum) {
-               mutex_unlock(&dd->ipath_eep_lock);
-               ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
-                               csum, ifp->if_csum);
-               ret = 1;
-               goto free_bail;
-       }
-       hi_water = 0;
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               int new_val = dd->ipath_eep_st_new_errs[idx];
-               if (new_val) {
-                       /*
-                        * If we have seen any errors, add to EEPROM values
-                        * We need to saturate at 0xFF (255) and we also
-                        * would need to adjust the checksum if we were
-                        * trying to minimize EEPROM traffic
-                        * Note that we add to actual current count in EEPROM,
-                        * in case it was altered while we were running.
-                        */
-                       new_val += ifp->if_errcntp[idx];
-                       if (new_val > 0xFF)
-                               new_val = 0xFF;
-                       if (ifp->if_errcntp[idx] != new_val) {
-                               ifp->if_errcntp[idx] = new_val;
-                               hi_water = offsetof(struct ipath_flash,
-                                               if_errcntp) + idx;
-                       }
-                       /*
-                        * update our shadow (used to minimize EEPROM
-                        * traffic), to match what we are about to write.
-                        */
-                       dd->ipath_eep_st_errs[idx] = new_val;
-                       dd->ipath_eep_st_new_errs[idx] = 0;
-               }
-       }
-       /*
-        * now update active-time. We would like to round to the nearest hour
-        * but unless atomic_t are sure to be proper signed ints we cannot,
-        * because we need to account for what we "transfer" to EEPROM and
-        * if we log an hour at 31 minutes, then we would need to set
-        * active_time to -29 to accurately count the _next_ hour.
-        */
-       if (new_time >= 3600) {
-               new_hrs = new_time / 3600;
-               atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
-               new_hrs += dd->ipath_eep_hrs;
-               if (new_hrs > 0xFFFF)
-                       new_hrs = 0xFFFF;
-               dd->ipath_eep_hrs = new_hrs;
-               if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
-                       ifp->if_powerhour[0] = new_hrs & 0xFF;
-                       hi_water = offsetof(struct ipath_flash, if_powerhour);
-               }
-               if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
-                       ifp->if_powerhour[1] = new_hrs >> 8;
-                       hi_water = offsetof(struct ipath_flash, if_powerhour)
-                                       + 1;
-               }
-       }
-       /*
-        * There is a tiny possibility that we could somehow fail to write
-        * the EEPROM after updating our shadows, but problems from holding
-        * the spinlock too long are a much bigger issue.
-        */
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-       if (hi_water) {
-               /* we made some change to the data, uopdate cksum and write */
-               csum = flash_csum(ifp, 1);
-               ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
-       }
-       mutex_unlock(&dd->ipath_eep_lock);
-       if (ret)
-               ipath_dev_err(dd, "Failed updating EEPROM\n");
-
-free_bail:
-       vfree(buf);
-bail:
-       return ret;
-
-}
-
-/**
- * ipath_inc_eeprom_err - increment one of the four error counters
- * that are logged to EEPROM.
- * @dd: the infinipath device
- * @eidx: 0..3, the counter to increment
- * @incr: how much to add
- *
- * Each counter is 8-bits, and saturates at 255 (0xFF). They
- * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
- * is called, but it can only be called in a context that allows sleep.
- * This function can be called even at interrupt level.
- */
-
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
-{
-       uint new_val;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
-       if (new_val > 255)
-               new_val = 255;
-       dd->ipath_eep_st_new_errs[eidx] = new_val;
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-       return;
-}
-
-static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
-{
-       int ret;
-       struct i2c_chain_desc *icd;
-
-       ret = -ENOENT;
-
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->temp_dev == IPATH_NO_DEV) {
-               /* tempsense only exists on new, real-I2C boards */
-               ret = -ENXIO;
-               goto bail;
-       }
-
-       if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-               ipath_dbg("Failed tempsense startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, regnum);
-       stop_cmd(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
-                             regnum);
-               ret = -ENXIO;
-               goto bail;
-       }
-       if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
-               ipath_dbg("Failed tempsense RD startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       /*
-        * We can only clock out one byte per command, sensibly
-        */
-       ret = rd_byte(dd);
-       stop_cmd(dd);
-
-bail:
-       return ret;
-}
-
-#define VALID_TS_RD_REG_MASK 0xBF
-
-/**
- * ipath_tempsense_read - read register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to read from
- *
- * returns reg contents (0..255) or < 0 for error
- */
-int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
-{
-       int ret;
-
-       if (regnum > 7)
-               return -EINVAL;
-
-       /* return a bogus value for (the one) register we do not have */
-       if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
-               return 0;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_tempsense_internal_read(dd, regnum);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       /*
-        * There are three possibilities here:
-        * ret is actual value (0..255)
-        * ret is -ENXIO or -EINVAL from code in this file
-        * ret is -EINTR from mutex_lock_interruptible.
-        */
-       return ret;
-}
-
-static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
-                                         u8 regnum, u8 data)
-{
-       int ret = -ENOENT;
-       struct i2c_chain_desc *icd;
-
-       icd = ipath_i2c_type(dd);
-       if (!icd)
-               goto bail;
-
-       if (icd->temp_dev == IPATH_NO_DEV) {
-               /* tempsense only exists on new, real-I2C boards */
-               ret = -ENXIO;
-               goto bail;
-       }
-       if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-               ipath_dbg("Failed tempsense startcmd\n");
-               stop_cmd(dd);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, regnum);
-       if (ret) {
-               stop_cmd(dd);
-               ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
-                             regnum);
-               ret = -ENXIO;
-               goto bail;
-       }
-       ret = wr_byte(dd, data);
-       stop_cmd(dd);
-       ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
-       if (ret) {
-               ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
-                             regnum);
-               ret = -ENXIO;
-       }
-
-bail:
-       return ret;
-}
-
-#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
-
-/**
- * ipath_tempsense_write - write register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to write
- * @data: data to write
- *
- * returns 0 for success or < 0 for error
- */
-int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
-{
-       int ret;
-
-       if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
-               return -EINVAL;
-
-       ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-       if (!ret) {
-               ret = ipath_tempsense_internal_write(dd, regnum, data);
-               mutex_unlock(&dd->ipath_eep_lock);
-       }
-
-       /*
-        * There are three possibilities here:
-        * ret is 0 for success
-        * ret is -ENXIO or -EINVAL from code in this file
-        * ret is -EINTR from mutex_lock_interruptible.
-        */
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
deleted file mode 100644 (file)
index 6187b84..0000000
+++ /dev/null
@@ -1,2619 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/cdev.h>
-#include <linux/swap.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/cpu.h>
-#include <linux/uio.h>
-#include <asm/pgtable.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-#include "ipath_user_sdma.h"
-
-static int ipath_open(struct inode *, struct file *);
-static int ipath_close(struct inode *, struct file *);
-static ssize_t ipath_write(struct file *, const char __user *, size_t,
-                          loff_t *);
-static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
-static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
-static int ipath_mmap(struct file *, struct vm_area_struct *);
-
-/*
- * This is really, really weird shit - write() and writev() here
- * have completely unrelated semantics.  Sucky userland ABI,
- * film at 11.
- */
-static const struct file_operations ipath_file_ops = {
-       .owner = THIS_MODULE,
-       .write = ipath_write,
-       .write_iter = ipath_write_iter,
-       .open = ipath_open,
-       .release = ipath_close,
-       .poll = ipath_poll,
-       .mmap = ipath_mmap,
-       .llseek = noop_llseek,
-};
-
-/*
- * Convert kernel virtual addresses to physical addresses so they don't
- * potentially conflict with the chip addresses used as mmap offsets.
- * It doesn't really matter what mmap offset we use as long as we can
- * interpret it correctly.
- */
-static u64 cvt_kvaddr(void *p)
-{
-       struct page *page;
-       u64 paddr = 0;
-
-       page = vmalloc_to_page(p);
-       if (page)
-               paddr = page_to_pfn(page) << PAGE_SHIFT;
-
-       return paddr;
-}
-
-static int ipath_get_base_info(struct file *fp,
-                              void __user *ubase, size_t ubase_size)
-{
-       struct ipath_portdata *pd = port_fp(fp);
-       int ret = 0;
-       struct ipath_base_info *kinfo = NULL;
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned subport_cnt;
-       int shared, master;
-       size_t sz;
-
-       subport_cnt = pd->port_subport_cnt;
-       if (!subport_cnt) {
-               shared = 0;
-               master = 0;
-               subport_cnt = 1;
-       } else {
-               shared = 1;
-               master = !subport_fp(fp);
-       }
-
-       sz = sizeof(*kinfo);
-       /* If port sharing is not requested, allow the old size structure */
-       if (!shared)
-               sz -= 7 * sizeof(u64);
-       if (ubase_size < sz) {
-               ipath_cdbg(PROC,
-                          "Base size %zu, need %zu (version mismatch?)\n",
-                          ubase_size, sz);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
-       if (kinfo == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       ret = dd->ipath_f_get_base_info(pd, kinfo);
-       if (ret < 0)
-               goto bail;
-
-       kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
-       kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
-       kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
-       kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
-       /*
-        * have to mmap whole thing
-        */
-       kinfo->spi_rcv_egrbuftotlen =
-               pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-       kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
-       kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
-               pd->port_rcvegrbuf_chunks;
-       kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
-       if (master)
-               kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
-       /*
-        * for this use, may be ipath_cfgports summed over all chips that
-        * are are configured and present
-        */
-       kinfo->spi_nports = dd->ipath_cfgports;
-       /* unit (chip/board) our port is on */
-       kinfo->spi_unit = dd->ipath_unit;
-       /* for now, only a single page */
-       kinfo->spi_tid_maxsize = PAGE_SIZE;
-
-       /*
-        * Doing this per port, and based on the skip value, etc.  This has
-        * to be the actual buffer size, since the protocol code treats it
-        * as an array.
-        *
-        * These have to be set to user addresses in the user code via mmap.
-        * These values are used on return to user code for the mmap target
-        * addresses only.  For 32 bit, same 44 bit address problem, so use
-        * the physical address, not virtual.  Before 2.6.11, using the
-        * page_address() macro worked, but in 2.6.11, even that returns the
-        * full 64 bit address (upper bits all 1's).  So far, using the
-        * physical addresses (or chip offsets, for chip mapping) works, but
-        * no doubt some future kernel release will change that, and we'll be
-        * on to yet another method of dealing with this.
-        */
-       kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-       kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
-       kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
-       kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
-       kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
-               (void *) dd->ipath_statusp -
-               (void *) dd->ipath_pioavailregs_dma;
-       if (!shared) {
-               kinfo->spi_piocnt = pd->port_piocnt;
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-               kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-                       dd->ipath_ureg_align * pd->port_port;
-       } else if (master) {
-               kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
-                                   (pd->port_piocnt % subport_cnt);
-               /* Master's PIO buffers are after all the slave's */
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-                       dd->ipath_palign *
-                       (pd->port_piocnt - kinfo->spi_piocnt);
-       } else {
-               unsigned slave = subport_fp(fp) - 1;
-
-               kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
-               kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-                       dd->ipath_palign * kinfo->spi_piocnt * slave;
-       }
-
-       if (shared) {
-               kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
-                       dd->ipath_ureg_align * pd->port_port;
-               kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
-               kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
-               kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
-
-               kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
-                       PAGE_SIZE * subport_fp(fp));
-
-               kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
-                       pd->port_rcvhdrq_size * subport_fp(fp));
-               kinfo->spi_rcvhdr_tailaddr = 0;
-               kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
-                       pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
-                       subport_fp(fp));
-
-               kinfo->spi_subport_uregbase =
-                       cvt_kvaddr(pd->subport_uregbase);
-               kinfo->spi_subport_rcvegrbuf =
-                       cvt_kvaddr(pd->subport_rcvegrbuf);
-               kinfo->spi_subport_rcvhdr_base =
-                       cvt_kvaddr(pd->subport_rcvhdr_base);
-               ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-                       kinfo->spi_port, kinfo->spi_runtime_flags,
-                       (unsigned long long) kinfo->spi_subport_uregbase,
-                       (unsigned long long) kinfo->spi_subport_rcvegrbuf,
-                       (unsigned long long) kinfo->spi_subport_rcvhdr_base);
-       }
-
-       /*
-        * All user buffers are 2KB buffers.  If we ever support
-        * giving 4KB buffers to user processes, this will need some
-        * work.
-        */
-       kinfo->spi_pioindex = (kinfo->spi_piobufbase -
-               (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
-       kinfo->spi_pioalign = dd->ipath_palign;
-
-       kinfo->spi_qpair = IPATH_KD_QP;
-       /*
-        * user mode PIO buffers are always 2KB, even when 4KB can
-        * be received, and sent via the kernel; this is ibmaxlen
-        * for 2K MTU.
-        */
-       kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
-       kinfo->spi_mtu = dd->ipath_ibmaxlen;    /* maxlen, not ibmtu */
-       kinfo->spi_port = pd->port_port;
-       kinfo->spi_subport = subport_fp(fp);
-       kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
-       kinfo->spi_hw_version = dd->ipath_revision;
-
-       if (master) {
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-       }
-
-       sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
-       if (copy_to_user(ubase, kinfo, sz))
-               ret = -EFAULT;
-
-bail:
-       kfree(kinfo);
-       return ret;
-}
-
-/**
- * ipath_tid_update - update a port TID
- * @pd: the port
- * @fp: the ipath device file
- * @ti: the TID information
- *
- * The new implementation as of Oct 2004 is that the driver assigns
- * the tid and returns it to the caller.   To make it easier to
- * catch bugs, and to reduce search time, we keep a cursor for
- * each port, walking the shadow tid array to find one that's not
- * in use.
- *
- * For now, if we can't allocate the full list, we fail, although
- * in the long run, we'll allocate as many as we can, and the
- * caller will deal with that by trying the remaining pages later.
- * That means that when we fail, we have to mark the tids as not in
- * use again, in our shadow copy.
- *
- * It's up to the caller to free the tids when they are done.
- * We'll unlock the pages as they free them.
- *
- * Also, right now we are locking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.
- */
-static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
-                           const struct ipath_tid_info *ti)
-{
-       int ret = 0, ntids;
-       u32 tid, porttid, cnt, i, tidcnt, tidoff;
-       u16 *tidlist;
-       struct ipath_devdata *dd = pd->port_dd;
-       u64 physaddr;
-       unsigned long vaddr;
-       u64 __iomem *tidbase;
-       unsigned long tidmap[8];
-       struct page **pagep = NULL;
-       unsigned subport = subport_fp(fp);
-
-       if (!dd->ipath_pageshadow) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       cnt = ti->tidcnt;
-       if (!cnt) {
-               ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
-                         (unsigned long long) ti->tidlist);
-               /*
-                * Should we treat as success?  likely a bug
-                */
-               ret = -EFAULT;
-               goto done;
-       }
-       porttid = pd->port_port * dd->ipath_rcvtidcnt;
-       if (!pd->port_subport_cnt) {
-               tidcnt = dd->ipath_rcvtidcnt;
-               tid = pd->port_tidcursor;
-               tidoff = 0;
-       } else if (!subport) {
-               tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-                        (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-               tidoff = dd->ipath_rcvtidcnt - tidcnt;
-               porttid += tidoff;
-               tid = tidcursor_fp(fp);
-       } else {
-               tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-               tidoff = tidcnt * (subport - 1);
-               porttid += tidoff;
-               tid = tidcursor_fp(fp);
-       }
-       if (cnt > tidcnt) {
-               /* make sure it all fits in port_tid_pg_list */
-               dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
-                        "TIDs, only trying max (%u)\n", cnt, tidcnt);
-               cnt = tidcnt;
-       }
-       pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
-       tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
-
-       memset(tidmap, 0, sizeof(tidmap));
-       /* before decrement; chip actual # */
-       ntids = tidcnt;
-       tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  porttid * sizeof(*tidbase));
-
-       ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
-                  pd->port_port, cnt, tid, tidbase);
-
-       /* virtual address of first page in transfer */
-       vaddr = ti->tidvaddr;
-       if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
-                      cnt * PAGE_SIZE)) {
-               ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
-                         (void *)vaddr, cnt);
-               ret = -EFAULT;
-               goto done;
-       }
-       ret = ipath_get_user_pages(vaddr, cnt, pagep);
-       if (ret) {
-               if (ret == -EBUSY) {
-                       ipath_dbg("Failed to lock addr %p, %u pages "
-                                 "(already locked)\n",
-                                 (void *) vaddr, cnt);
-                       /*
-                        * for now, continue, and see what happens but with
-                        * the new implementation, this should never happen,
-                        * unless perhaps the user has mpin'ed the pages
-                        * themselves (something we need to test)
-                        */
-                       ret = 0;
-               } else {
-                       dev_info(&dd->pcidev->dev,
-                                "Failed to lock addr %p, %u pages: "
-                                "errno %d\n", (void *) vaddr, cnt, -ret);
-                       goto done;
-               }
-       }
-       for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
-               for (; ntids--; tid++) {
-                       if (tid == tidcnt)
-                               tid = 0;
-                       if (!dd->ipath_pageshadow[porttid + tid])
-                               break;
-               }
-               if (ntids < 0) {
-                       /*
-                        * oops, wrapped all the way through their TIDs,
-                        * and didn't have enough free; see comments at
-                        * start of routine
-                        */
-                       ipath_dbg("Not enough free TIDs for %u pages "
-                                 "(index %d), failing\n", cnt, i);
-                       i--;    /* last tidlist[i] not filled in */
-                       ret = -ENOMEM;
-                       break;
-               }
-               tidlist[i] = tid + tidoff;
-               ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-                          "vaddr %lx\n", i, tid + tidoff, vaddr);
-               /* we "know" system pages and TID pages are same size */
-               dd->ipath_pageshadow[porttid + tid] = pagep[i];
-               dd->ipath_physshadow[porttid + tid] = ipath_map_page(
-                       dd->pcidev, pagep[i], 0, PAGE_SIZE,
-                       PCI_DMA_FROMDEVICE);
-               /*
-                * don't need atomic or it's overhead
-                */
-               __set_bit(tid, tidmap);
-               physaddr = dd->ipath_physshadow[porttid + tid];
-               ipath_stats.sps_pagelocks++;
-               ipath_cdbg(VERBOSE,
-                          "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
-                          tid, vaddr, (unsigned long long) physaddr,
-                          pagep[i]);
-               dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
-                                   physaddr);
-               /*
-                * don't check this tid in ipath_portshadow, since we
-                * just filled it in; start with the next one.
-                */
-               tid++;
-       }
-
-       if (ret) {
-               u32 limit;
-       cleanup:
-               /* jump here if copy out of updated info failed... */
-               ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
-                         -ret, i, cnt);
-               /* same code that's in ipath_free_tid() */
-               limit = sizeof(tidmap) * BITS_PER_BYTE;
-               if (limit > tidcnt)
-                       /* just in case size changes in future */
-                       limit = tidcnt;
-               tid = find_first_bit((const unsigned long *)tidmap, limit);
-               for (; tid < limit; tid++) {
-                       if (!test_bit(tid, tidmap))
-                               continue;
-                       if (dd->ipath_pageshadow[porttid + tid]) {
-                               ipath_cdbg(VERBOSE, "Freeing TID %u\n",
-                                          tid);
-                               dd->ipath_f_put_tid(dd, &tidbase[tid],
-                                                   RCVHQ_RCV_TYPE_EXPECTED,
-                                                   dd->ipath_tidinvalid);
-                               pci_unmap_page(dd->pcidev,
-                                       dd->ipath_physshadow[porttid + tid],
-                                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                               dd->ipath_pageshadow[porttid + tid] = NULL;
-                               ipath_stats.sps_pageunlocks++;
-                       }
-               }
-               ipath_release_user_pages(pagep, cnt);
-       } else {
-               /*
-                * Copy the updated array, with ipath_tid's filled in, back
-                * to user.  Since we did the copy in already, this "should
-                * never fail" If it does, we have to clean up...
-                */
-               if (copy_to_user((void __user *)
-                                (unsigned long) ti->tidlist,
-                                tidlist, cnt * sizeof(*tidlist))) {
-                       ret = -EFAULT;
-                       goto cleanup;
-               }
-               if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
-                                tidmap, sizeof tidmap)) {
-                       ret = -EFAULT;
-                       goto cleanup;
-               }
-               if (tid == tidcnt)
-                       tid = 0;
-               if (!pd->port_subport_cnt)
-                       pd->port_tidcursor = tid;
-               else
-                       tidcursor_fp(fp) = tid;
-       }
-
-done:
-       if (ret)
-               ipath_dbg("Failed to map %u TID pages, failing with %d\n",
-                         ti->tidcnt, -ret);
-       return ret;
-}
-
-/**
- * ipath_tid_free - free a port TID
- * @pd: the port
- * @subport: the subport
- * @ti: the TID info
- *
- * right now we are unlocking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.  We check that the TID is in range for this port
- * but otherwise don't check validity; if user has an error and
- * frees the wrong tid, it's only their own data that can thereby
- * be corrupted.  We do check that the TID was in use, for sanity
- * We always use our idea of the saved address, not the address that
- * they pass in to us.
- */
-
-static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
-                         const struct ipath_tid_info *ti)
-{
-       int ret = 0;
-       u32 tid, porttid, cnt, limit, tidcnt;
-       struct ipath_devdata *dd = pd->port_dd;
-       u64 __iomem *tidbase;
-       unsigned long tidmap[8];
-
-       if (!dd->ipath_pageshadow) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
-                          sizeof tidmap)) {
-               ret = -EFAULT;
-               goto done;
-       }
-
-       porttid = pd->port_port * dd->ipath_rcvtidcnt;
-       if (!pd->port_subport_cnt)
-               tidcnt = dd->ipath_rcvtidcnt;
-       else if (!subport) {
-               tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-                        (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-               porttid += dd->ipath_rcvtidcnt - tidcnt;
-       } else {
-               tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-               porttid += tidcnt * (subport - 1);
-       }
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  porttid * sizeof(*tidbase));
-
-       limit = sizeof(tidmap) * BITS_PER_BYTE;
-       if (limit > tidcnt)
-               /* just in case size changes in future */
-               limit = tidcnt;
-       tid = find_first_bit(tidmap, limit);
-       ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
-                  "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
-                  limit, tid, porttid);
-       for (cnt = 0; tid < limit; tid++) {
-               /*
-                * small optimization; if we detect a run of 3 or so without
-                * any set, use find_first_bit again.  That's mainly to
-                * accelerate the case where we wrapped, so we have some at
-                * the beginning, and some at the end, and a big gap
-                * in the middle.
-                */
-               if (!test_bit(tid, tidmap))
-                       continue;
-               cnt++;
-               if (dd->ipath_pageshadow[porttid + tid]) {
-                       struct page *p;
-                       p = dd->ipath_pageshadow[porttid + tid];
-                       dd->ipath_pageshadow[porttid + tid] = NULL;
-                       ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
-                                  pid_nr(pd->port_pid), tid);
-                       dd->ipath_f_put_tid(dd, &tidbase[tid],
-                                           RCVHQ_RCV_TYPE_EXPECTED,
-                                           dd->ipath_tidinvalid);
-                       pci_unmap_page(dd->pcidev,
-                               dd->ipath_physshadow[porttid + tid],
-                               PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                       ipath_release_user_pages(&p, 1);
-                       ipath_stats.sps_pageunlocks++;
-               } else
-                       ipath_dbg("Unused tid %u, ignoring\n", tid);
-       }
-       if (cnt != ti->tidcnt)
-               ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
-                         ti->tidcnt, cnt);
-done:
-       if (ret)
-               ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
-                         ti->tidcnt, -ret);
-       return ret;
-}
-
-/**
- * ipath_set_part_key - set a partition key
- * @pd: the port
- * @key: the key
- *
- * We can have up to 4 active at a time (other than the default, which is
- * always allowed).  This is somewhat tricky, since multiple ports may set
- * the same key, so we reference count them, and clean up at exit.  All 4
- * partition keys are packed into a single infinipath register.  It's an
- * error for a process to set the same pkey multiple times.  We provide no
- * mechanism to de-allocate a pkey at this time, we may eventually need to
- * do that.  I've used the atomic operations, and no locking, and only make
- * a single pass through what's available.  This should be more than
- * adequate for some time. I'll think about spinlocks or the like if and as
- * it's necessary.
- */
-static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       int i, any = 0, pidx = -1;
-       u16 lkey = key & 0x7FFF;
-       int ret;
-
-       if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
-               /* nothing to do; this key always valid */
-               ret = 0;
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
-                  "%hx:%x %hx:%x %hx:%x %hx:%x\n",
-                  pd->port_port, key, dd->ipath_pkeys[0],
-                  atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
-                  atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
-                  atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
-                  atomic_read(&dd->ipath_pkeyrefs[3]));
-
-       if (!lkey) {
-               ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
-                          pd->port_port);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /*
-        * Set the full membership bit, because it has to be
-        * set in the register or the packet, and it seems
-        * cleaner to set in the register than to force all
-        * callers to set it. (see bug 4331)
-        */
-       key |= 0x8000;
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               if (!pd->port_pkeys[i] && pidx == -1)
-                       pidx = i;
-               if (pd->port_pkeys[i] == key) {
-                       ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
-                                  "(%x) more than once\n",
-                                  pd->port_port, key);
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (pidx == -1) {
-               ipath_dbg("All pkeys for port %u already in use, "
-                         "can't set %x\n", pd->port_port, key);
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               if (dd->ipath_pkeys[i] == key) {
-                       atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
-
-                       if (atomic_inc_return(pkrefs) > 1) {
-                               pd->port_pkeys[pidx] = key;
-                               ipath_cdbg(VERBOSE, "p%u set key %x "
-                                          "matches #%d, count now %d\n",
-                                          pd->port_port, key, i,
-                                          atomic_read(pkrefs));
-                               ret = 0;
-                               goto bail;
-                       } else {
-                               /*
-                                * lost race, decrement count, catch below
-                                */
-                               atomic_dec(pkrefs);
-                               ipath_cdbg(VERBOSE, "Lost race, count was "
-                                          "0, after dec, it's %d\n",
-                                          atomic_read(pkrefs));
-                               any++;
-                       }
-               }
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       /*
-                        * It makes no sense to have both the limited and
-                        * full membership PKEY set at the same time since
-                        * the unlimited one will disable the limited one.
-                        */
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ipath_dbg("port %u, all pkeys already in use, "
-                         "can't set %x\n", pd->port_port, key);
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       u64 pkey;
-
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
-                       pkey =
-                               (u64) dd->ipath_pkeys[0] |
-                               ((u64) dd->ipath_pkeys[1] << 16) |
-                               ((u64) dd->ipath_pkeys[2] << 32) |
-                               ((u64) dd->ipath_pkeys[3] << 48);
-                       ipath_cdbg(PROC, "p%u set key %x in #%d, "
-                                  "portidx %d, new pkey reg %llx\n",
-                                  pd->port_port, key, i, pidx,
-                                  (unsigned long long) pkey);
-                       ipath_write_kreg(
-                               dd, dd->ipath_kregs->kr_partitionkey, pkey);
-
-                       ret = 0;
-                       goto bail;
-               }
-       }
-       ipath_dbg("port %u, all pkeys already in use 2nd pass, "
-                 "can't set %x\n", pd->port_port, key);
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_manage_rcvq - manage a port's receive queue
- * @pd: the port
- * @subport: the subport
- * @start_stop: action to carry out
- *
- * start_stop == 0 disables receive on the port, for use in queue
- * overflow conditions.  start_stop==1 re-enables, to be used to
- * re-init the software copy of the head register
- */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
-                            int start_stop)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-
-       ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
-                  start_stop ? "en" : "dis", dd->ipath_unit,
-                  pd->port_port, subport);
-       if (subport)
-               goto bail;
-       /* atomically clear receive enable port. */
-       if (start_stop) {
-               /*
-                * On enable, force in-memory copy of the tail register to
-                * 0, so that protocol code doesn't have to worry about
-                * whether or not the chip has yet updated the in-memory
-                * copy or not on return from the system call. The chip
-                * always resets it's tail register back to 0 on a
-                * transition from disabled to enabled.  This could cause a
-                * problem if software was broken, and did the enable w/o
-                * the disable, but eventually the in-memory copy will be
-                * updated and correct itself, even in the face of software
-                * bugs.
-                */
-               if (pd->port_rcvhdrtail_kvaddr)
-                       ipath_clear_rcvhdrtail(pd);
-               set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-                       &dd->ipath_rcvctrl);
-       } else
-               clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
-                         &dd->ipath_rcvctrl);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-       /* now be sure chip saw it before we return */
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       if (start_stop) {
-               /*
-                * And try to be sure that tail reg update has happened too.
-                * This should in theory interlock with the RXE changes to
-                * the tail register.  Don't assign it to the tail register
-                * in memory copy, since we could overwrite an update by the
-                * chip if we did.
-                */
-               ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-       }
-       /* always; new head should be equal to new tail; see above */
-bail:
-       return 0;
-}
-
-static void ipath_clean_part_key(struct ipath_portdata *pd,
-                                struct ipath_devdata *dd)
-{
-       int i, j, pchanged = 0;
-       u64 oldpkey;
-
-       /* for debugging only */
-       oldpkey = (u64) dd->ipath_pkeys[0] |
-               ((u64) dd->ipath_pkeys[1] << 16) |
-               ((u64) dd->ipath_pkeys[2] << 32) |
-               ((u64) dd->ipath_pkeys[3] << 48);
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               if (!pd->port_pkeys[i])
-                       continue;
-               ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
-                          pd->port_pkeys[i]);
-               for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
-                       /* check for match independent of the global bit */
-                       if ((dd->ipath_pkeys[j] & 0x7fff) !=
-                           (pd->port_pkeys[i] & 0x7fff))
-                               continue;
-                       if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
-                               ipath_cdbg(VERBOSE, "p%u clear key "
-                                          "%x matches #%d\n",
-                                          pd->port_port,
-                                          pd->port_pkeys[i], j);
-                               ipath_stats.sps_pkeys[j] =
-                                       dd->ipath_pkeys[j] = 0;
-                               pchanged++;
-                       } else {
-                               ipath_cdbg(VERBOSE, "p%u key %x matches #%d, "
-                                          "but ref still %d\n", pd->port_port,
-                                          pd->port_pkeys[i], j,
-                                          atomic_read(&dd->ipath_pkeyrefs[j]));
-                               break;
-                       }
-               }
-               pd->port_pkeys[i] = 0;
-       }
-       if (pchanged) {
-               u64 pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
-                          "new pkey reg %llx\n", pd->port_port,
-                          (unsigned long long) oldpkey,
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-       }
-}
-
-/*
- * Initialize the port data with the receive buffer sizes
- * so this can be done while the master port is locked.
- * Otherwise, there is a race with a slave opening the port
- * and seeing these fields uninitialized.
- */
-static void init_user_egr_sizes(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned egrperchunk, egrcnt, size;
-
-       /*
-        * to avoid wasting a lot of memory, we allocate 32KB chunks of
-        * physically contiguous memory, advance through it until used up
-        * and then allocate more.  Of course, we need memory to store those
-        * extra pointers, now.  Started out with 256KB, but under heavy
-        * memory pressure (creating large files and then copying them over
-        * NFS while doing lots of MPI jobs), we hit some allocation
-        * failures, even though we can sleep...  (2.6.10) Still get
-        * failures at 64K.  32K is the lowest we can go without wasting
-        * additional memory.
-        */
-       size = 0x8000;
-       egrperchunk = size / dd->ipath_rcvegrbufsize;
-       egrcnt = dd->ipath_rcvegrcnt;
-       pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
-       pd->port_rcvegrbufs_perchunk = egrperchunk;
-       pd->port_rcvegrbuf_size = size;
-}
-
-/**
- * ipath_create_user_egr - allocate eager TID buffers
- * @pd: the port to allocate TID buffers for
- *
- * This routine is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance
- * This is the user port version
- *
- * Allocate the eager TID buffers and program them into infinipath
- * They are no longer completely contiguous, we do multiple allocation
- * calls.
- */
-static int ipath_create_user_egr(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
-       size_t size;
-       int ret;
-       gfp_t gfp_flags;
-
-       /*
-        * GFP_USER, but without GFP_FS, so buffer cache can be
-        * coalesced (we hope); otherwise, even at order 4,
-        * heavy filesystem activity makes these fail, and we can
-        * use compound pages.
-        */
-       gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
-       egrcnt = dd->ipath_rcvegrcnt;
-       /* TID number offset for this port */
-       egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
-       egrsize = dd->ipath_rcvegrbufsize;
-       ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
-                  "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
-
-       chunk = pd->port_rcvegrbuf_chunks;
-       egrperchunk = pd->port_rcvegrbufs_perchunk;
-       size = pd->port_rcvegrbuf_size;
-       pd->port_rcvegrbuf = kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf[0]),
-                                          GFP_KERNEL);
-       if (!pd->port_rcvegrbuf) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       pd->port_rcvegrbuf_phys =
-               kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf_phys[0]),
-                             GFP_KERNEL);
-       if (!pd->port_rcvegrbuf_phys) {
-               ret = -ENOMEM;
-               goto bail_rcvegrbuf;
-       }
-       for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-
-               pd->port_rcvegrbuf[e] = dma_alloc_coherent(
-                       &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
-                       gfp_flags);
-
-               if (!pd->port_rcvegrbuf[e]) {
-                       ret = -ENOMEM;
-                       goto bail_rcvegrbuf_phys;
-               }
-       }
-
-       pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
-
-       for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
-               dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
-               unsigned i;
-
-               for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
-                       dd->ipath_f_put_tid(dd, e + egroff +
-                                           (u64 __iomem *)
-                                           ((char __iomem *)
-                                            dd->ipath_kregbase +
-                                            dd->ipath_rcvegrbase),
-                                           RCVHQ_RCV_TYPE_EAGER, pa);
-                       pa += egrsize;
-               }
-               cond_resched(); /* don't hog the cpu */
-       }
-
-       ret = 0;
-       goto bail;
-
-bail_rcvegrbuf_phys:
-       for (e = 0; e < pd->port_rcvegrbuf_chunks &&
-               pd->port_rcvegrbuf[e]; e++) {
-               dma_free_coherent(&dd->pcidev->dev, size,
-                                 pd->port_rcvegrbuf[e],
-                                 pd->port_rcvegrbuf_phys[e]);
-
-       }
-       kfree(pd->port_rcvegrbuf_phys);
-       pd->port_rcvegrbuf_phys = NULL;
-bail_rcvegrbuf:
-       kfree(pd->port_rcvegrbuf);
-       pd->port_rcvegrbuf = NULL;
-bail:
-       return ret;
-}
-
-
-/* common code for the mappings on dma_alloc_coherent mem */
-static int ipath_mmap_mem(struct vm_area_struct *vma,
-       struct ipath_portdata *pd, unsigned len, int write_ok,
-       void *kvaddr, char *what)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned long pfn;
-       int ret;
-
-       if ((vma->vm_end - vma->vm_start) > len) {
-               dev_info(&dd->pcidev->dev,
-                        "FAIL on %s: len %lx > %x\n", what,
-                        vma->vm_end - vma->vm_start, len);
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       if (!write_ok) {
-               if (vma->vm_flags & VM_WRITE) {
-                       dev_info(&dd->pcidev->dev,
-                                "%s must be mapped readonly\n", what);
-                       ret = -EPERM;
-                       goto bail;
-               }
-
-               /* don't allow them to later change with mprotect */
-               vma->vm_flags &= ~VM_MAYWRITE;
-       }
-
-       pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
-       ret = remap_pfn_range(vma, vma->vm_start, pfn,
-                             len, vma->vm_page_prot);
-       if (ret)
-               dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
-                        "bytes r%c failed: %d\n", what, pd->port_port,
-                        pfn, len, write_ok?'w':'o', ret);
-       else
-               ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
-                          "r%c\n", what, pd->port_port, pfn, len,
-                          write_ok?'w':'o');
-bail:
-       return ret;
-}
-
-static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
-                    u64 ureg)
-{
-       unsigned long phys;
-       int ret;
-
-       /*
-        * This is real hardware, so use io_remap.  This is the mechanism
-        * for the user process to update the head registers for their port
-        * in the chip.
-        */
-       if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-               dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
-                        "%lx > PAGE\n", vma->vm_end - vma->vm_start);
-               ret = -EFAULT;
-       } else {
-               phys = dd->ipath_physaddr + ureg;
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-               vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-               ret = io_remap_pfn_range(vma, vma->vm_start,
-                                        phys >> PAGE_SHIFT,
-                                        vma->vm_end - vma->vm_start,
-                                        vma->vm_page_prot);
-       }
-       return ret;
-}
-
-static int mmap_piobufs(struct vm_area_struct *vma,
-                       struct ipath_devdata *dd,
-                       struct ipath_portdata *pd,
-                       unsigned piobufs, unsigned piocnt)
-{
-       unsigned long phys;
-       int ret;
-
-       /*
-        * When we map the PIO buffers in the chip, we want to map them as
-        * writeonly, no read possible.   This prevents access to previous
-        * process data, and catches users who might try to read the i/o
-        * space due to a bug.
-        */
-       if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
-               dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
-                        "reqlen %lx > PAGE\n",
-                        vma->vm_end - vma->vm_start);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       phys = dd->ipath_physaddr + piobufs;
-
-#if defined(__powerpc__)
-       /* There isn't a generic way to specify writethrough mappings */
-       pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-       pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
-       pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
-#endif
-
-       /*
-        * don't allow them to later change to readable with mprotect (for when
-        * not initially mapped readable, as is normally the case)
-        */
-       vma->vm_flags &= ~VM_MAYREAD;
-       vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-
-       ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
-                                vma->vm_end - vma->vm_start,
-                                vma->vm_page_prot);
-bail:
-       return ret;
-}
-
-static int mmap_rcvegrbufs(struct vm_area_struct *vma,
-                          struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       unsigned long start, size;
-       size_t total_size, i;
-       unsigned long pfn;
-       int ret;
-
-       size = pd->port_rcvegrbuf_size;
-       total_size = pd->port_rcvegrbuf_chunks * size;
-       if ((vma->vm_end - vma->vm_start) > total_size) {
-               dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
-                        "reqlen %lx > actual %lx\n",
-                        vma->vm_end - vma->vm_start,
-                        (unsigned long) total_size);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (vma->vm_flags & VM_WRITE) {
-               dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
-                        "writable (flags=%lx)\n", vma->vm_flags);
-               ret = -EPERM;
-               goto bail;
-       }
-       /* don't allow them to later change to writeable with mprotect */
-       vma->vm_flags &= ~VM_MAYWRITE;
-
-       start = vma->vm_start;
-
-       for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-               pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
-               ret = remap_pfn_range(vma, start, pfn, size,
-                                     vma->vm_page_prot);
-               if (ret < 0)
-                       goto bail;
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/*
- * ipath_file_vma_fault - handle a VMA page fault.
- */
-static int ipath_file_vma_fault(struct vm_area_struct *vma,
-                                       struct vm_fault *vmf)
-{
-       struct page *page;
-
-       page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
-       if (!page)
-               return VM_FAULT_SIGBUS;
-       get_page(page);
-       vmf->page = page;
-
-       return 0;
-}
-
-static const struct vm_operations_struct ipath_file_vm_ops = {
-       .fault = ipath_file_vma_fault,
-};
-
-static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
-                      struct ipath_portdata *pd, unsigned subport)
-{
-       unsigned long len;
-       struct ipath_devdata *dd;
-       void *addr;
-       size_t size;
-       int ret = 0;
-
-       /* If the port is not shared, all addresses should be physical */
-       if (!pd->port_subport_cnt)
-               goto bail;
-
-       dd = pd->port_dd;
-       size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-
-       /*
-        * Each process has all the subport uregbase, rcvhdrq, and
-        * rcvegrbufs mmapped - as an array for all the processes,
-        * and also separately for this process.
-        */
-       if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
-               addr = pd->subport_uregbase;
-               size = PAGE_SIZE * pd->port_subport_cnt;
-       } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
-               addr = pd->subport_rcvhdr_base;
-               size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
-       } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
-               addr = pd->subport_rcvegrbuf;
-               size *= pd->port_subport_cnt;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
-                                        PAGE_SIZE * subport)) {
-                addr = pd->subport_uregbase + PAGE_SIZE * subport;
-                size = PAGE_SIZE;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
-                                pd->port_rcvhdrq_size * subport)) {
-                addr = pd->subport_rcvhdr_base +
-                        pd->port_rcvhdrq_size * subport;
-                size = pd->port_rcvhdrq_size;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
-                               size * subport)) {
-                addr = pd->subport_rcvegrbuf + size * subport;
-                /* rcvegrbufs are read-only on the slave */
-                if (vma->vm_flags & VM_WRITE) {
-                        dev_info(&dd->pcidev->dev,
-                                 "Can't map eager buffers as "
-                                 "writable (flags=%lx)\n", vma->vm_flags);
-                        ret = -EPERM;
-                        goto bail;
-                }
-                /*
-                 * Don't allow permission to later change to writeable
-                 * with mprotect.
-                 */
-                vma->vm_flags &= ~VM_MAYWRITE;
-       } else {
-               goto bail;
-       }
-       len = vma->vm_end - vma->vm_start;
-       if (len > size) {
-               ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
-       vma->vm_ops = &ipath_file_vm_ops;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_mmap - mmap various structures into user space
- * @fp: the file pointer
- * @vma: the VM area
- *
- * We use this to have a shared buffer between the kernel and the user code
- * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
- * buffers in the chip.  We have the open and close entries so we can bump
- * the ref count and keep the driver from being unloaded while still mapped.
- */
-static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-       struct ipath_portdata *pd;
-       struct ipath_devdata *dd;
-       u64 pgaddr, ureg;
-       unsigned piobufs, piocnt;
-       int ret;
-
-       pd = port_fp(fp);
-       if (!pd) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       dd = pd->port_dd;
-
-       /*
-        * This is the ipath_do_user_init() code, mapping the shared buffers
-        * into the user process. The address referred to by vm_pgoff is the
-        * file offset passed via mmap().  For shared ports, this is the
-        * kernel vmalloc() address of the pages to share with the master.
-        * For non-shared or master ports, this is a physical address.
-        * We only do one mmap for each space mapped.
-        */
-       pgaddr = vma->vm_pgoff << PAGE_SHIFT;
-
-       /*
-        * Check for 0 in case one of the allocations failed, but user
-        * called mmap anyway.
-        */
-       if (!pgaddr)  {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
-                  (unsigned long long) pgaddr, vma->vm_start,
-                  vma->vm_end - vma->vm_start, dd->ipath_unit,
-                  pd->port_port, subport_fp(fp));
-
-       /*
-        * Physical addresses must fit in 40 bits for our hardware.
-        * Check for kernel virtual addresses first, anything else must
-        * match a HW or memory address.
-        */
-       ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
-       if (ret) {
-               if (ret > 0)
-                       ret = 0;
-               goto bail;
-       }
-
-       ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
-       if (!pd->port_subport_cnt) {
-               /* port is not shared */
-               piocnt = pd->port_piocnt;
-               piobufs = pd->port_piobufs;
-       } else if (!subport_fp(fp)) {
-               /* caller is the master */
-               piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
-                        (pd->port_piocnt % pd->port_subport_cnt);
-               piobufs = pd->port_piobufs +
-                       dd->ipath_palign * (pd->port_piocnt - piocnt);
-       } else {
-               unsigned slave = subport_fp(fp) - 1;
-
-               /* caller is a slave */
-               piocnt = pd->port_piocnt / pd->port_subport_cnt;
-               piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
-       }
-
-       if (pgaddr == ureg)
-               ret = mmap_ureg(vma, dd, ureg);
-       else if (pgaddr == piobufs)
-               ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
-       else if (pgaddr == dd->ipath_pioavailregs_phys)
-               /* in-memory copy of pioavail registers */
-               ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-                                    (void *) dd->ipath_pioavailregs_dma,
-                                    "pioavail registers");
-       else if (pgaddr == pd->port_rcvegr_phys)
-               ret = mmap_rcvegrbufs(vma, pd);
-       else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
-               /*
-                * The rcvhdrq itself; readonly except on HT (so have
-                * to allow writable mapping), multiple pages, contiguous
-                * from an i/o perspective.
-                */
-               ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
-                                    pd->port_rcvhdrq,
-                                    "rcvhdrq");
-       else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
-               /* in-memory copy of rcvhdrq tail register */
-               ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-                                    pd->port_rcvhdrtail_kvaddr,
-                                    "rcvhdrq tail");
-       else
-               ret = -EINVAL;
-
-       vma->vm_private_data = NULL;
-
-       if (ret < 0)
-               dev_info(&dd->pcidev->dev,
-                        "Failure %d on off %llx len %lx\n",
-                        -ret, (unsigned long long)pgaddr,
-                        vma->vm_end - vma->vm_start);
-bail:
-       return ret;
-}
-
-static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
-{
-       unsigned pollflag = 0;
-
-       if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
-           pd->port_hdrqfull != pd->port_hdrqfull_poll) {
-               pollflag |= POLLIN | POLLRDNORM;
-               pd->port_hdrqfull_poll = pd->port_hdrqfull;
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
-                                     struct file *fp,
-                                     struct poll_table_struct *pt)
-{
-       unsigned pollflag = 0;
-       struct ipath_devdata *dd;
-
-       dd = pd->port_dd;
-
-       /* variable access in ipath_poll_hdrqfull() needs this */
-       rmb();
-       pollflag = ipath_poll_hdrqfull(pd);
-
-       if (pd->port_urgent != pd->port_urgent_poll) {
-               pollflag |= POLLIN | POLLRDNORM;
-               pd->port_urgent_poll = pd->port_urgent;
-       }
-
-       if (!pollflag) {
-               /* this saves a spin_lock/unlock in interrupt handler... */
-               set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
-               /* flush waiting flag so don't miss an event... */
-               wmb();
-               poll_wait(fp, &pd->port_wait, pt);
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll_next(struct ipath_portdata *pd,
-                                   struct file *fp,
-                                   struct poll_table_struct *pt)
-{
-       u32 head;
-       u32 tail;
-       unsigned pollflag = 0;
-       struct ipath_devdata *dd;
-
-       dd = pd->port_dd;
-
-       /* variable access in ipath_poll_hdrqfull() needs this */
-       rmb();
-       pollflag = ipath_poll_hdrqfull(pd);
-
-       head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-       if (pd->port_rcvhdrtail_kvaddr)
-               tail = ipath_get_rcvhdrtail(pd);
-       else
-               tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-
-       if (head != tail)
-               pollflag |= POLLIN | POLLRDNORM;
-       else {
-               /* this saves a spin_lock/unlock in interrupt handler */
-               set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-               /* flush waiting flag so we don't miss an event */
-               wmb();
-
-               set_bit(pd->port_port + dd->ipath_r_intravail_shift,
-                       &dd->ipath_rcvctrl);
-
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                                dd->ipath_rcvctrl);
-
-               if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-                       ipath_write_ureg(dd, ur_rcvhdrhead,
-                                        dd->ipath_rhdrhead_intr_off | head,
-                                        pd->port_port);
-
-               poll_wait(fp, &pd->port_wait, pt);
-       }
-
-       return pollflag;
-}
-
-static unsigned int ipath_poll(struct file *fp,
-                              struct poll_table_struct *pt)
-{
-       struct ipath_portdata *pd;
-       unsigned pollflag;
-
-       pd = port_fp(fp);
-       if (!pd)
-               pollflag = 0;
-       else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
-               pollflag = ipath_poll_urgent(pd, fp, pt);
-       else
-               pollflag = ipath_poll_next(pd, fp, pt);
-
-       return pollflag;
-}
-
-static int ipath_supports_subports(int user_swmajor, int user_swminor)
-{
-       /* no subport implementation prior to software version 1.3 */
-       return (user_swmajor > 1) || (user_swminor >= 3);
-}
-
-static int ipath_compatible_subports(int user_swmajor, int user_swminor)
-{
-       /* this code is written long-hand for clarity */
-       if (IPATH_USER_SWMAJOR != user_swmajor) {
-               /* no promise of compatibility if major mismatch */
-               return 0;
-       }
-       if (IPATH_USER_SWMAJOR == 1) {
-               switch (IPATH_USER_SWMINOR) {
-               case 0:
-               case 1:
-               case 2:
-                       /* no subport implementation so cannot be compatible */
-                       return 0;
-               case 3:
-                       /* 3 is only compatible with itself */
-                       return user_swminor == 3;
-               default:
-                       /* >= 4 are compatible (or are expected to be) */
-                       return user_swminor >= 4;
-               }
-       }
-       /* make no promises yet for future major versions */
-       return 0;
-}
-
-static int init_subports(struct ipath_devdata *dd,
-                        struct ipath_portdata *pd,
-                        const struct ipath_user_info *uinfo)
-{
-       int ret = 0;
-       unsigned num_subports;
-       size_t size;
-
-       /*
-        * If the user is requesting zero subports,
-        * skip the subport allocation.
-        */
-       if (uinfo->spu_subport_cnt <= 0)
-               goto bail;
-
-       /* Self-consistency check for ipath_compatible_subports() */
-       if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
-           !ipath_compatible_subports(IPATH_USER_SWMAJOR,
-                                      IPATH_USER_SWMINOR)) {
-               dev_info(&dd->pcidev->dev,
-                        "Inconsistent ipath_compatible_subports()\n");
-               goto bail;
-       }
-
-       /* Check for subport compatibility */
-       if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
-                                      uinfo->spu_userversion & 0xffff)) {
-               dev_info(&dd->pcidev->dev,
-                        "Mismatched user version (%d.%d) and driver "
-                        "version (%d.%d) while port sharing. Ensure "
-                         "that driver and library are from the same "
-                         "release.\n",
-                        (int) (uinfo->spu_userversion >> 16),
-                         (int) (uinfo->spu_userversion & 0xffff),
-                        IPATH_USER_SWMAJOR,
-                        IPATH_USER_SWMINOR);
-               goto bail;
-       }
-       if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       num_subports = uinfo->spu_subport_cnt;
-       pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
-       if (!pd->subport_uregbase) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
-       size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-                    sizeof(u32), PAGE_SIZE) * num_subports;
-       pd->subport_rcvhdr_base = vzalloc(size);
-       if (!pd->subport_rcvhdr_base) {
-               ret = -ENOMEM;
-               goto bail_ureg;
-       }
-
-       pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
-                                       pd->port_rcvegrbuf_size *
-                                       num_subports);
-       if (!pd->subport_rcvegrbuf) {
-               ret = -ENOMEM;
-               goto bail_rhdr;
-       }
-
-       pd->port_subport_cnt = uinfo->spu_subport_cnt;
-       pd->port_subport_id = uinfo->spu_subport_id;
-       pd->active_slaves = 1;
-       set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-       goto bail;
-
-bail_rhdr:
-       vfree(pd->subport_rcvhdr_base);
-bail_ureg:
-       vfree(pd->subport_uregbase);
-       pd->subport_uregbase = NULL;
-bail:
-       return ret;
-}
-
-static int try_alloc_port(struct ipath_devdata *dd, int port,
-                         struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       struct ipath_portdata *pd;
-       int ret;
-
-       if (!(pd = dd->ipath_pd[port])) {
-               void *ptmp;
-
-               pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
-
-               /*
-                * Allocate memory for use in ipath_tid_update() just once
-                * at open, not per call.  Reduces cost of expected send
-                * setup.
-                */
-               ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
-                              dd->ipath_rcvtidcnt * sizeof(struct page **),
-                              GFP_KERNEL);
-               if (!pd || !ptmp) {
-                       ipath_dev_err(dd, "Unable to allocate portdata "
-                                     "memory, failing open\n");
-                       ret = -ENOMEM;
-                       kfree(pd);
-                       kfree(ptmp);
-                       goto bail;
-               }
-               dd->ipath_pd[port] = pd;
-               dd->ipath_pd[port]->port_port = port;
-               dd->ipath_pd[port]->port_dd = dd;
-               dd->ipath_pd[port]->port_tid_pg_list = ptmp;
-               init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
-       }
-       if (!pd->port_cnt) {
-               pd->userversion = uinfo->spu_userversion;
-               init_user_egr_sizes(pd);
-               if ((ret = init_subports(dd, pd, uinfo)) != 0)
-                       goto bail;
-               ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
-                          current->comm, current->pid, dd->ipath_unit,
-                          port);
-               pd->port_cnt = 1;
-               port_fp(fp) = pd;
-               pd->port_pid = get_pid(task_pid(current));
-               strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
-               ipath_stats.sps_ports++;
-               ret = 0;
-       } else
-               ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-static inline int usable(struct ipath_devdata *dd)
-{
-       return dd &&
-               (dd->ipath_flags & IPATH_PRESENT) &&
-               dd->ipath_kregbase &&
-               dd->ipath_lid &&
-               !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
-                                    | IPATH_LINKUNK));
-}
-
-static int find_free_port(int unit, struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       struct ipath_devdata *dd = ipath_lookup(unit);
-       int ret, i;
-
-       if (!dd) {
-               ret = -ENODEV;
-               goto bail;
-       }
-
-       if (!usable(dd)) {
-               ret = -ENETDOWN;
-               goto bail;
-       }
-
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               ret = try_alloc_port(dd, i, fp, uinfo);
-               if (ret != -EBUSY)
-                       goto bail;
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-static int find_best_unit(struct file *fp,
-                         const struct ipath_user_info *uinfo)
-{
-       int ret = 0, i, prefunit = -1, devmax;
-       int maxofallports, npresent, nup;
-       int ndev;
-
-       devmax = ipath_count_units(&npresent, &nup, &maxofallports);
-
-       /*
-        * This code is present to allow a knowledgeable person to
-        * specify the layout of processes to processors before opening
-        * this driver, and then we'll assign the process to the "closest"
-        * InfiniPath chip to that processor (we assume reasonable connectivity,
-        * for now).  This code assumes that if affinity has been set
-        * before this point, that at most one cpu is set; for now this
-        * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
-        * in case some kernel variant sets none of the bits when no
-        * affinity is set.  2.6.11 and 12 kernels have all present
-        * cpus set.  Some day we'll have to fix it up further to handle
-        * a cpu subset.  This algorithm fails for two HT chips connected
-        * in tunnel fashion.  Eventually this needs real topology
-        * information.  There may be some issues with dual core numbering
-        * as well.  This needs more work prior to release.
-        */
-       if (!cpumask_empty(tsk_cpus_allowed(current)) &&
-           !cpumask_full(tsk_cpus_allowed(current))) {
-               int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
-               get_online_cpus();
-               for_each_online_cpu(i)
-                       if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
-                               ipath_cdbg(PROC, "%s[%u] affinity set for "
-                                          "cpu %d/%d\n", current->comm,
-                                          current->pid, i, ncpus);
-                               curcpu = i;
-                               nset++;
-                       }
-               put_online_cpus();
-               if (curcpu != -1 && nset != ncpus) {
-                       if (npresent) {
-                               prefunit = curcpu / (ncpus / npresent);
-                               ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
-                                         "%d cpus/chip, select unit %d\n",
-                                         current->comm, current->pid,
-                                         npresent, ncpus, ncpus / npresent,
-                                         prefunit);
-                       }
-               }
-       }
-
-       /*
-        * user ports start at 1, kernel port is 0
-        * For now, we do round-robin access across all chips
-        */
-
-       if (prefunit != -1)
-               devmax = prefunit + 1;
-recheck:
-       for (i = 1; i < maxofallports; i++) {
-               for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
-                    ndev++) {
-                       struct ipath_devdata *dd = ipath_lookup(ndev);
-
-                       if (!usable(dd))
-                               continue; /* can't use this unit */
-                       if (i >= dd->ipath_cfgports)
-                               /*
-                                * Maxed out on users of this unit. Try
-                                * next.
-                                */
-                               continue;
-                       ret = try_alloc_port(dd, i, fp, uinfo);
-                       if (!ret)
-                               goto done;
-               }
-       }
-
-       if (npresent) {
-               if (nup == 0) {
-                       ret = -ENETDOWN;
-                       ipath_dbg("No ports available (none initialized "
-                                 "and ready)\n");
-               } else {
-                       if (prefunit > 0) {
-                               /* if started above 0, retry from 0 */
-                               ipath_cdbg(PROC,
-                                          "%s[%u] no ports on prefunit "
-                                          "%d, clear and re-check\n",
-                                          current->comm, current->pid,
-                                          prefunit);
-                               devmax = ipath_count_units(NULL, NULL,
-                                                          NULL);
-                               prefunit = -1;
-                               goto recheck;
-                       }
-                       ret = -EBUSY;
-                       ipath_dbg("No ports available\n");
-               }
-       } else {
-               ret = -ENXIO;
-               ipath_dbg("No boards found\n");
-       }
-
-done:
-       return ret;
-}
-
-static int find_shared_port(struct file *fp,
-                           const struct ipath_user_info *uinfo)
-{
-       int devmax, ndev, i;
-       int ret = 0;
-
-       devmax = ipath_count_units(NULL, NULL, NULL);
-
-       for (ndev = 0; ndev < devmax; ndev++) {
-               struct ipath_devdata *dd = ipath_lookup(ndev);
-
-               if (!usable(dd))
-                       continue;
-               for (i = 1; i < dd->ipath_cfgports; i++) {
-                       struct ipath_portdata *pd = dd->ipath_pd[i];
-
-                       /* Skip ports which are not yet open */
-                       if (!pd || !pd->port_cnt)
-                               continue;
-                       /* Skip port if it doesn't match the requested one */
-                       if (pd->port_subport_id != uinfo->spu_subport_id)
-                               continue;
-                       /* Verify the sharing process matches the master */
-                       if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
-                           pd->userversion != uinfo->spu_userversion ||
-                           pd->port_cnt >= pd->port_subport_cnt) {
-                               ret = -EINVAL;
-                               goto done;
-                       }
-                       port_fp(fp) = pd;
-                       subport_fp(fp) = pd->port_cnt++;
-                       pd->port_subpid[subport_fp(fp)] =
-                               get_pid(task_pid(current));
-                       tidcursor_fp(fp) = 0;
-                       pd->active_slaves |= 1 << subport_fp(fp);
-                       ipath_cdbg(PROC,
-                                  "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
-                                  current->comm, current->pid,
-                                  subport_fp(fp),
-                                  pd->port_comm, pid_nr(pd->port_pid),
-                                  dd->ipath_unit, pd->port_port);
-                       ret = 1;
-                       goto done;
-               }
-       }
-
-done:
-       return ret;
-}
-
-static int ipath_open(struct inode *in, struct file *fp)
-{
-       /* The real work is performed later in ipath_assign_port() */
-       fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
-       return fp->private_data ? 0 : -ENOMEM;
-}
-
-/* Get port early, so can set affinity prior to memory allocation */
-static int ipath_assign_port(struct file *fp,
-                             const struct ipath_user_info *uinfo)
-{
-       int ret;
-       int i_minor;
-       unsigned swmajor, swminor;
-
-       /* Check to be sure we haven't already initialized this file */
-       if (port_fp(fp)) {
-               ret = -EINVAL;
-               goto done;
-       }
-
-       /* for now, if major version is different, bail */
-       swmajor = uinfo->spu_userversion >> 16;
-       if (swmajor != IPATH_USER_SWMAJOR) {
-               ipath_dbg("User major version %d not same as driver "
-                         "major %d\n", uinfo->spu_userversion >> 16,
-                         IPATH_USER_SWMAJOR);
-               ret = -ENODEV;
-               goto done;
-       }
-
-       swminor = uinfo->spu_userversion & 0xffff;
-       if (swminor != IPATH_USER_SWMINOR)
-               ipath_dbg("User minor version %d not same as driver "
-                         "minor %d\n", swminor, IPATH_USER_SWMINOR);
-
-       mutex_lock(&ipath_mutex);
-
-       if (ipath_compatible_subports(swmajor, swminor) &&
-           uinfo->spu_subport_cnt &&
-           (ret = find_shared_port(fp, uinfo))) {
-               if (ret > 0)
-                       ret = 0;
-               goto done_chk_sdma;
-       }
-
-       i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
-       ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-                  (long)file_inode(fp)->i_rdev, i_minor);
-
-       if (i_minor)
-               ret = find_free_port(i_minor - 1, fp, uinfo);
-       else
-               ret = find_best_unit(fp, uinfo);
-
-done_chk_sdma:
-       if (!ret) {
-               struct ipath_filedata *fd = fp->private_data;
-               const struct ipath_portdata *pd = fd->pd;
-               const struct ipath_devdata *dd = pd->port_dd;
-
-               fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
-                                                     dd->ipath_unit,
-                                                     pd->port_port,
-                                                     fd->subport);
-
-               if (!fd->pq)
-                       ret = -ENOMEM;
-       }
-
-       mutex_unlock(&ipath_mutex);
-
-done:
-       return ret;
-}
-
-
-static int ipath_do_user_init(struct file *fp,
-                             const struct ipath_user_info *uinfo)
-{
-       int ret;
-       struct ipath_portdata *pd = port_fp(fp);
-       struct ipath_devdata *dd;
-       u32 head32;
-
-       /* Subports don't need to initialize anything since master did it. */
-       if (subport_fp(fp)) {
-               ret = wait_event_interruptible(pd->port_wait,
-                       !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
-               goto done;
-       }
-
-       dd = pd->port_dd;
-
-       if (uinfo->spu_rcvhdrsize) {
-               ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-               if (ret)
-                       goto done;
-       }
-
-       /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-       /* some ports may get extra buffers, calculate that here */
-       if (pd->port_port <= dd->ipath_ports_extrabuf)
-               pd->port_piocnt = dd->ipath_pbufsport + 1;
-       else
-               pd->port_piocnt = dd->ipath_pbufsport;
-
-       /* for right now, kernel piobufs are at end, so port 1 is at 0 */
-       if (pd->port_port <= dd->ipath_ports_extrabuf)
-               pd->port_pio_base = (dd->ipath_pbufsport + 1)
-                       * (pd->port_port - 1);
-       else
-               pd->port_pio_base = dd->ipath_ports_extrabuf +
-                       dd->ipath_pbufsport * (pd->port_port - 1);
-       pd->port_piobufs = dd->ipath_piobufbase +
-               pd->port_pio_base * dd->ipath_palign;
-       ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
-               " first pio %u\n", pd->port_port, pd->port_piobufs,
-               pd->port_piocnt, pd->port_pio_base);
-       ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
-
-       /*
-        * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-        * array for time being.  If pd->port_port > chip-supported,
-        * we need to do extra stuff here to handle by handling overflow
-        * through port 0, someday
-        */
-       ret = ipath_create_rcvhdrq(dd, pd);
-       if (!ret)
-               ret = ipath_create_user_egr(pd);
-       if (ret)
-               goto done;
-
-       /*
-        * set the eager head register for this port to the current values
-        * of the tail pointers, since we don't know if they were
-        * updated on last use of the port.
-        */
-       head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-       ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-       pd->port_lastrcvhdrqtail = -1;
-       ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-               pd->port_port, head32);
-       pd->port_tidcursor = 0; /* start at beginning after open */
-
-       /* initialize poll variables... */
-       pd->port_urgent = 0;
-       pd->port_urgent_poll = 0;
-       pd->port_hdrqfull_poll = pd->port_hdrqfull;
-
-       /*
-        * Now enable the port for receive.
-        * For chips that are set to DMA the tail register to memory
-        * when they change (and when the update bit transitions from
-        * 0 to 1.  So for those chips, we turn it off and then back on.
-        * This will (very briefly) affect any other open ports, but the
-        * duration is very short, and therefore isn't an issue.  We
-        * explicitly set the in-memory tail copy to 0 beforehand, so we
-        * don't have to wait to be sure the DMA update has happened
-        * (chip resets head/tail to 0 on transition to enable).
-        */
-       set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-               &dd->ipath_rcvctrl);
-       if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-               if (pd->port_rcvhdrtail_kvaddr)
-                       ipath_clear_rcvhdrtail(pd);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                       dd->ipath_rcvctrl &
-                       ~(1ULL << dd->ipath_r_tailupd_shift));
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-       /* Notify any waiting slaves */
-       if (pd->port_subport_cnt) {
-               clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-               wake_up(&pd->port_wait);
-       }
-done:
-       return ret;
-}
-
-/**
- * unlock_exptid - unlock any expected TID entries port still had in use
- * @pd: port
- *
- * We don't actually update the chip here, because we do a bulk update
- * below, using ipath_f_clear_tids.
- */
-static void unlock_expected_tids(struct ipath_portdata *pd)
-{
-       struct ipath_devdata *dd = pd->port_dd;
-       int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
-       int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-
-       ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
-                  pd->port_port);
-       for (i = port_tidbase; i < maxtid; i++) {
-               struct page *ps = dd->ipath_pageshadow[i];
-
-               if (!ps)
-                       continue;
-
-               dd->ipath_pageshadow[i] = NULL;
-               pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
-                       PAGE_SIZE, PCI_DMA_FROMDEVICE);
-               ipath_release_user_pages_on_close(&ps, 1);
-               cnt++;
-               ipath_stats.sps_pageunlocks++;
-       }
-       if (cnt)
-               ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
-                          pd->port_port, cnt);
-
-       if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
-               ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
-                          (unsigned long long) ipath_stats.sps_pagelocks,
-                          (unsigned long long)
-                          ipath_stats.sps_pageunlocks);
-}
-
-static int ipath_close(struct inode *in, struct file *fp)
-{
-       struct ipath_filedata *fd;
-       struct ipath_portdata *pd;
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       unsigned port;
-       struct pid *pid;
-
-       ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
-                  (long)in->i_rdev, fp->private_data);
-
-       mutex_lock(&ipath_mutex);
-
-       fd = fp->private_data;
-       fp->private_data = NULL;
-       pd = fd->pd;
-       if (!pd) {
-               mutex_unlock(&ipath_mutex);
-               goto bail;
-       }
-
-       dd = pd->port_dd;
-
-       /* drain user sdma queue */
-       ipath_user_sdma_queue_drain(dd, fd->pq);
-       ipath_user_sdma_queue_destroy(fd->pq);
-
-       if (--pd->port_cnt) {
-               /*
-                * XXX If the master closes the port before the slave(s),
-                * revoke the mmap for the eager receive queue so
-                * the slave(s) don't wait for receive data forever.
-                */
-               pd->active_slaves &= ~(1 << fd->subport);
-               put_pid(pd->port_subpid[fd->subport]);
-               pd->port_subpid[fd->subport] = NULL;
-               mutex_unlock(&ipath_mutex);
-               goto bail;
-       }
-       /* early; no interrupt users after this */
-       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-       port = pd->port_port;
-       dd->ipath_pd[port] = NULL;
-       pid = pd->port_pid;
-       pd->port_pid = NULL;
-       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-       if (pd->port_rcvwait_to || pd->port_piowait_to
-           || pd->port_rcvnowait || pd->port_pionowait) {
-               ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
-                          "%u rcv %u, pio already\n",
-                          pd->port_port, pd->port_rcvwait_to,
-                          pd->port_piowait_to, pd->port_rcvnowait,
-                          pd->port_pionowait);
-               pd->port_rcvwait_to = pd->port_piowait_to =
-                       pd->port_rcvnowait = pd->port_pionowait = 0;
-       }
-       if (pd->port_flag) {
-               ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
-                         pd->port_port, pd->port_flag);
-               pd->port_flag = 0;
-       }
-
-       if (dd->ipath_kregbase) {
-               /* atomically clear receive enable port and intr avail. */
-               clear_bit(dd->ipath_r_portenable_shift + port,
-                         &dd->ipath_rcvctrl);
-               clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
-                         &dd->ipath_rcvctrl);
-               ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
-                       dd->ipath_rcvctrl);
-               /* and read back from chip to be sure that nothing
-                * else is in flight when we do the rest */
-               (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-               /* clean up the pkeys for this port user */
-               ipath_clean_part_key(pd, dd);
-               /*
-                * be paranoid, and never write 0's to these, just use an
-                * unused part of the port 0 tail page.  Of course,
-                * rcvhdraddr points to a large chunk of memory, so this
-                * could still trash things, but at least it won't trash
-                * page 0, and by disabling the port, it should stop "soon",
-                * even if a packet or two is in already in flight after we
-                * disabled the port.
-                */
-               ipath_write_kreg_port(dd,
-                       dd->ipath_kregs->kr_rcvhdrtailaddr, port,
-                       dd->ipath_dummy_hdrq_phys);
-               ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-                       pd->port_port, dd->ipath_dummy_hdrq_phys);
-
-               ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
-               ipath_chg_pioavailkernel(dd, pd->port_pio_base,
-                       pd->port_piocnt, 1);
-
-               dd->ipath_f_clear_tids(dd, pd->port_port);
-
-               if (dd->ipath_pageshadow)
-                       unlock_expected_tids(pd);
-               ipath_stats.sps_ports--;
-               ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-                          pd->port_comm, pid_nr(pid),
-                          dd->ipath_unit, port);
-       }
-
-       put_pid(pid);
-       mutex_unlock(&ipath_mutex);
-       ipath_free_pddata(dd, pd); /* after releasing the mutex */
-
-bail:
-       kfree(fd);
-       return 0;
-}
-
-static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
-                          struct ipath_port_info __user *uinfo)
-{
-       struct ipath_port_info info;
-       int nup;
-       int ret;
-       size_t sz;
-
-       (void) ipath_count_units(NULL, &nup, NULL);
-       info.num_active = nup;
-       info.unit = pd->port_dd->ipath_unit;
-       info.port = pd->port_port;
-       info.subport = subport;
-       /* Don't return new fields if old library opened the port. */
-       if (ipath_supports_subports(pd->userversion >> 16,
-                                   pd->userversion & 0xffff)) {
-               /* Number of user ports available for this device. */
-               info.num_ports = pd->port_dd->ipath_cfgports - 1;
-               info.num_subports = pd->port_subport_cnt;
-               sz = sizeof(info);
-       } else
-               sz = sizeof(info) - 2 * sizeof(u16);
-
-       if (copy_to_user(uinfo, &info, sz)) {
-               ret = -EFAULT;
-               goto bail;
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_get_slave_info(struct ipath_portdata *pd,
-                               void __user *slave_mask_addr)
-{
-       int ret = 0;
-
-       if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
-               ret = -EFAULT;
-       return ret;
-}
-
-static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
-                                  u32 __user *inflightp)
-{
-       const u32 val = ipath_user_sdma_inflight_counter(pq);
-
-       if (put_user(val, inflightp))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int ipath_sdma_get_complete(struct ipath_devdata *dd,
-                                  struct ipath_user_sdma_queue *pq,
-                                  u32 __user *completep)
-{
-       u32 val;
-       int err;
-
-       err = ipath_user_sdma_make_progress(dd, pq);
-       if (err < 0)
-               return err;
-
-       val = ipath_user_sdma_complete_counter(pq);
-       if (put_user(val, completep))
-               return -EFAULT;
-
-       return 0;
-}
-
-static ssize_t ipath_write(struct file *fp, const char __user *data,
-                          size_t count, loff_t *off)
-{
-       const struct ipath_cmd __user *ucmd;
-       struct ipath_portdata *pd;
-       const void __user *src;
-       size_t consumed, copy;
-       struct ipath_cmd cmd;
-       ssize_t ret = 0;
-       void *dest;
-
-       if (count < sizeof(cmd.type)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ucmd = (const struct ipath_cmd __user *) data;
-
-       if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
-               ret = -EFAULT;
-               goto bail;
-       }
-
-       consumed = sizeof(cmd.type);
-
-       switch (cmd.type) {
-       case IPATH_CMD_ASSIGN_PORT:
-       case __IPATH_CMD_USER_INIT:
-       case IPATH_CMD_USER_INIT:
-               copy = sizeof(cmd.cmd.user_info);
-               dest = &cmd.cmd.user_info;
-               src = &ucmd->cmd.user_info;
-               break;
-       case IPATH_CMD_RECV_CTRL:
-               copy = sizeof(cmd.cmd.recv_ctrl);
-               dest = &cmd.cmd.recv_ctrl;
-               src = &ucmd->cmd.recv_ctrl;
-               break;
-       case IPATH_CMD_PORT_INFO:
-               copy = sizeof(cmd.cmd.port_info);
-               dest = &cmd.cmd.port_info;
-               src = &ucmd->cmd.port_info;
-               break;
-       case IPATH_CMD_TID_UPDATE:
-       case IPATH_CMD_TID_FREE:
-               copy = sizeof(cmd.cmd.tid_info);
-               dest = &cmd.cmd.tid_info;
-               src = &ucmd->cmd.tid_info;
-               break;
-       case IPATH_CMD_SET_PART_KEY:
-               copy = sizeof(cmd.cmd.part_key);
-               dest = &cmd.cmd.part_key;
-               src = &ucmd->cmd.part_key;
-               break;
-       case __IPATH_CMD_SLAVE_INFO:
-               copy = sizeof(cmd.cmd.slave_mask_addr);
-               dest = &cmd.cmd.slave_mask_addr;
-               src = &ucmd->cmd.slave_mask_addr;
-               break;
-       case IPATH_CMD_PIOAVAILUPD:     // force an update of PIOAvail reg
-               copy = 0;
-               src = NULL;
-               dest = NULL;
-               break;
-       case IPATH_CMD_POLL_TYPE:
-               copy = sizeof(cmd.cmd.poll_type);
-               dest = &cmd.cmd.poll_type;
-               src = &ucmd->cmd.poll_type;
-               break;
-       case IPATH_CMD_ARMLAUNCH_CTRL:
-               copy = sizeof(cmd.cmd.armlaunch_ctrl);
-               dest = &cmd.cmd.armlaunch_ctrl;
-               src = &ucmd->cmd.armlaunch_ctrl;
-               break;
-       case IPATH_CMD_SDMA_INFLIGHT:
-               copy = sizeof(cmd.cmd.sdma_inflight);
-               dest = &cmd.cmd.sdma_inflight;
-               src = &ucmd->cmd.sdma_inflight;
-               break;
-       case IPATH_CMD_SDMA_COMPLETE:
-               copy = sizeof(cmd.cmd.sdma_complete);
-               dest = &cmd.cmd.sdma_complete;
-               src = &ucmd->cmd.sdma_complete;
-               break;
-       default:
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (copy) {
-               if ((count - consumed) < copy) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               if (copy_from_user(dest, src, copy)) {
-                       ret = -EFAULT;
-                       goto bail;
-               }
-
-               consumed += copy;
-       }
-
-       pd = port_fp(fp);
-       if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
-               cmd.type != IPATH_CMD_ASSIGN_PORT) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       switch (cmd.type) {
-       case IPATH_CMD_ASSIGN_PORT:
-               ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               break;
-       case __IPATH_CMD_USER_INIT:
-               /* backwards compatibility, get port first */
-               ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               /* and fall through to current version. */
-       case IPATH_CMD_USER_INIT:
-               ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
-               if (ret)
-                       goto bail;
-               ret = ipath_get_base_info(
-                       fp, (void __user *) (unsigned long)
-                       cmd.cmd.user_info.spu_base_info,
-                       cmd.cmd.user_info.spu_base_info_size);
-               break;
-       case IPATH_CMD_RECV_CTRL:
-               ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
-               break;
-       case IPATH_CMD_PORT_INFO:
-               ret = ipath_port_info(pd, subport_fp(fp),
-                                     (struct ipath_port_info __user *)
-                                     (unsigned long) cmd.cmd.port_info);
-               break;
-       case IPATH_CMD_TID_UPDATE:
-               ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
-               break;
-       case IPATH_CMD_TID_FREE:
-               ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
-               break;
-       case IPATH_CMD_SET_PART_KEY:
-               ret = ipath_set_part_key(pd, cmd.cmd.part_key);
-               break;
-       case __IPATH_CMD_SLAVE_INFO:
-               ret = ipath_get_slave_info(pd,
-                                          (void __user *) (unsigned long)
-                                          cmd.cmd.slave_mask_addr);
-               break;
-       case IPATH_CMD_PIOAVAILUPD:
-               ipath_force_pio_avail_update(pd->port_dd);
-               break;
-       case IPATH_CMD_POLL_TYPE:
-               pd->poll_type = cmd.cmd.poll_type;
-               break;
-       case IPATH_CMD_ARMLAUNCH_CTRL:
-               if (cmd.cmd.armlaunch_ctrl)
-                       ipath_enable_armlaunch(pd->port_dd);
-               else
-                       ipath_disable_armlaunch(pd->port_dd);
-               break;
-       case IPATH_CMD_SDMA_INFLIGHT:
-               ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
-                                             (u32 __user *) (unsigned long)
-                                             cmd.cmd.sdma_inflight);
-               break;
-       case IPATH_CMD_SDMA_COMPLETE:
-               ret = ipath_sdma_get_complete(pd->port_dd,
-                                             user_sdma_queue_fp(fp),
-                                             (u32 __user *) (unsigned long)
-                                             cmd.cmd.sdma_complete);
-               break;
-       }
-
-       if (ret >= 0)
-               ret = consumed;
-
-bail:
-       return ret;
-}
-
-static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
-       struct file *filp = iocb->ki_filp;
-       struct ipath_filedata *fp = filp->private_data;
-       struct ipath_portdata *pd = port_fp(filp);
-       struct ipath_user_sdma_queue *pq = fp->pq;
-
-       if (!iter_is_iovec(from) || !from->nr_segs)
-               return -EINVAL;
-
-       return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
-}
-
-static struct class *ipath_class;
-
-static int init_cdev(int minor, char *name, const struct file_operations *fops,
-                    struct cdev **cdevp, struct device **devp)
-{
-       const dev_t dev = MKDEV(IPATH_MAJOR, minor);
-       struct cdev *cdev = NULL;
-       struct device *device = NULL;
-       int ret;
-
-       cdev = cdev_alloc();
-       if (!cdev) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not allocate cdev for minor %d, %s\n",
-                      minor, name);
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       cdev->owner = THIS_MODULE;
-       cdev->ops = fops;
-       kobject_set_name(&cdev->kobj, name);
-
-       ret = cdev_add(cdev, dev, 1);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME
-                      ": Could not add cdev for minor %d, %s (err %d)\n",
-                      minor, name, -ret);
-               goto err_cdev;
-       }
-
-       device = device_create(ipath_class, NULL, dev, NULL, name);
-
-       if (IS_ERR(device)) {
-               ret = PTR_ERR(device);
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-                      "device for minor %d, %s (err %d)\n",
-                      minor, name, -ret);
-               goto err_cdev;
-       }
-
-       goto done;
-
-err_cdev:
-       cdev_del(cdev);
-       cdev = NULL;
-
-done:
-       if (ret >= 0) {
-               *cdevp = cdev;
-               *devp = device;
-       } else {
-               *cdevp = NULL;
-               *devp = NULL;
-       }
-
-       return ret;
-}
-
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-                   struct cdev **cdevp, struct device **devp)
-{
-       return init_cdev(minor, name, fops, cdevp, devp);
-}
-
-static void cleanup_cdev(struct cdev **cdevp,
-                        struct device **devp)
-{
-       struct device *dev = *devp;
-
-       if (dev) {
-               device_unregister(dev);
-               *devp = NULL;
-       }
-
-       if (*cdevp) {
-               cdev_del(*cdevp);
-               *cdevp = NULL;
-       }
-}
-
-void ipath_cdev_cleanup(struct cdev **cdevp,
-                       struct device **devp)
-{
-       cleanup_cdev(cdevp, devp);
-}
-
-static struct cdev *wildcard_cdev;
-static struct device *wildcard_dev;
-
-static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
-
-static int user_init(void)
-{
-       int ret;
-
-       ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
-       if (ret < 0) {
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
-                      "chrdev region (err %d)\n", -ret);
-               goto done;
-       }
-
-       ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
-
-       if (IS_ERR(ipath_class)) {
-               ret = PTR_ERR(ipath_class);
-               printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-                      "device class (err %d)\n", -ret);
-               goto bail;
-       }
-
-       goto done;
-bail:
-       unregister_chrdev_region(dev, IPATH_NMINORS);
-done:
-       return ret;
-}
-
-static void user_cleanup(void)
-{
-       if (ipath_class) {
-               class_destroy(ipath_class);
-               ipath_class = NULL;
-       }
-
-       unregister_chrdev_region(dev, IPATH_NMINORS);
-}
-
-static atomic_t user_count = ATOMIC_INIT(0);
-static atomic_t user_setup = ATOMIC_INIT(0);
-
-int ipath_user_add(struct ipath_devdata *dd)
-{
-       char name[10];
-       int ret;
-
-       if (atomic_inc_return(&user_count) == 1) {
-               ret = user_init();
-               if (ret < 0) {
-                       ipath_dev_err(dd, "Unable to set up user support: "
-                                     "error %d\n", -ret);
-                       goto bail;
-               }
-               ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
-                               &wildcard_dev);
-               if (ret < 0) {
-                       ipath_dev_err(dd, "Could not create wildcard "
-                                     "minor: error %d\n", -ret);
-                       goto bail_user;
-               }
-
-               atomic_set(&user_setup, 1);
-       }
-
-       snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
-
-       ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
-                       &dd->user_cdev, &dd->user_dev);
-       if (ret < 0)
-               ipath_dev_err(dd, "Could not create user minor %d, %s\n",
-                             dd->ipath_unit + 1, name);
-
-       goto bail;
-
-bail_user:
-       user_cleanup();
-bail:
-       return ret;
-}
-
-void ipath_user_remove(struct ipath_devdata *dd)
-{
-       cleanup_cdev(&dd->user_cdev, &dd->user_dev);
-
-       if (atomic_dec_return(&user_count) == 0) {
-               if (atomic_read(&user_setup) == 0)
-                       goto bail;
-
-               cleanup_cdev(&wildcard_cdev, &wildcard_dev);
-               user_cleanup();
-
-               atomic_set(&user_setup, 0);
-       }
-bail:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
deleted file mode 100644 (file)
index 796af68..0000000
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-#define IPATHFS_MAGIC 0x726a77
-
-static struct super_block *ipath_super;
-
-static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
-                        umode_t mode, const struct file_operations *fops,
-                        void *data)
-{
-       int error;
-       struct inode *inode = new_inode(dir->i_sb);
-
-       if (!inode) {
-               error = -EPERM;
-               goto bail;
-       }
-
-       inode->i_ino = get_next_ino();
-       inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       inode->i_private = data;
-       if (S_ISDIR(mode)) {
-               inode->i_op = &simple_dir_inode_operations;
-               inc_nlink(inode);
-               inc_nlink(dir);
-       }
-
-       inode->i_fop = fops;
-
-       d_instantiate(dentry, inode);
-       error = 0;
-
-bail:
-       return error;
-}
-
-static int create_file(const char *name, umode_t mode,
-                      struct dentry *parent, struct dentry **dentry,
-                      const struct file_operations *fops, void *data)
-{
-       int error;
-
-       mutex_lock(&d_inode(parent)->i_mutex);
-       *dentry = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(*dentry))
-               error = ipathfs_mknod(d_inode(parent), *dentry,
-                                     mode, fops, data);
-       else
-               error = PTR_ERR(*dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
-
-       return error;
-}
-
-static ssize_t atomic_stats_read(struct file *file, char __user *buf,
-                                size_t count, loff_t *ppos)
-{
-       return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
-                                      sizeof ipath_stats);
-}
-
-static const struct file_operations atomic_stats_ops = {
-       .read = atomic_stats_read,
-       .llseek = default_llseek,
-};
-
-static ssize_t atomic_counters_read(struct file *file, char __user *buf,
-                                   size_t count, loff_t *ppos)
-{
-       struct infinipath_counters counters;
-       struct ipath_devdata *dd;
-
-       dd = file_inode(file)->i_private;
-       dd->ipath_f_read_counters(dd, &counters);
-
-       return simple_read_from_buffer(buf, count, ppos, &counters,
-                                      sizeof counters);
-}
-
-static const struct file_operations atomic_counters_ops = {
-       .read = atomic_counters_read,
-       .llseek = default_llseek,
-};
-
-static ssize_t flash_read(struct file *file, char __user *buf,
-                         size_t count, loff_t *ppos)
-{
-       struct ipath_devdata *dd;
-       ssize_t ret;
-       loff_t pos;
-       char *tmp;
-
-       pos = *ppos;
-
-       if ( pos < 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (pos >= sizeof(struct ipath_flash)) {
-               ret = 0;
-               goto bail;
-       }
-
-       if (count > sizeof(struct ipath_flash) - pos)
-               count = sizeof(struct ipath_flash) - pos;
-
-       tmp = kmalloc(count, GFP_KERNEL);
-       if (!tmp) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       dd = file_inode(file)->i_private;
-       if (ipath_eeprom_read(dd, pos, tmp, count)) {
-               ipath_dev_err(dd, "failed to read from flash\n");
-               ret = -ENXIO;
-               goto bail_tmp;
-       }
-
-       if (copy_to_user(buf, tmp, count)) {
-               ret = -EFAULT;
-               goto bail_tmp;
-       }
-
-       *ppos = pos + count;
-       ret = count;
-
-bail_tmp:
-       kfree(tmp);
-
-bail:
-       return ret;
-}
-
-static ssize_t flash_write(struct file *file, const char __user *buf,
-                          size_t count, loff_t *ppos)
-{
-       struct ipath_devdata *dd;
-       ssize_t ret;
-       loff_t pos;
-       char *tmp;
-
-       pos = *ppos;
-
-       if (pos != 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (count != sizeof(struct ipath_flash)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       tmp = memdup_user(buf, count);
-       if (IS_ERR(tmp))
-               return PTR_ERR(tmp);
-
-       dd = file_inode(file)->i_private;
-       if (ipath_eeprom_write(dd, pos, tmp, count)) {
-               ret = -ENXIO;
-               ipath_dev_err(dd, "failed to write to flash\n");
-               goto bail_tmp;
-       }
-
-       *ppos = pos + count;
-       ret = count;
-
-bail_tmp:
-       kfree(tmp);
-
-bail:
-       return ret;
-}
-
-static const struct file_operations flash_ops = {
-       .read = flash_read,
-       .write = flash_write,
-       .llseek = default_llseek,
-};
-
-static int create_device_files(struct super_block *sb,
-                              struct ipath_devdata *dd)
-{
-       struct dentry *dir, *tmp;
-       char unit[10];
-       int ret;
-
-       snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-       ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
-                         &simple_dir_operations, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
-                         &atomic_counters_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/atomic_counters) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
-                         &flash_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/flash) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-static int remove_file(struct dentry *parent, char *name)
-{
-       struct dentry *tmp;
-       int ret;
-
-       tmp = lookup_one_len(name, parent, strlen(name));
-
-       if (IS_ERR(tmp)) {
-               ret = PTR_ERR(tmp);
-               goto bail;
-       }
-
-       spin_lock(&tmp->d_lock);
-       if (simple_positive(tmp)) {
-               dget_dlock(tmp);
-               __d_drop(tmp);
-               spin_unlock(&tmp->d_lock);
-               simple_unlink(d_inode(parent), tmp);
-       } else
-               spin_unlock(&tmp->d_lock);
-
-       ret = 0;
-bail:
-       /*
-        * We don't expect clients to care about the return value, but
-        * it's there if they need it.
-        */
-       return ret;
-}
-
-static int remove_device_files(struct super_block *sb,
-                              struct ipath_devdata *dd)
-{
-       struct dentry *dir, *root;
-       char unit[10];
-       int ret;
-
-       root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
-       snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-       dir = lookup_one_len(unit, root, strlen(unit));
-
-       if (IS_ERR(dir)) {
-               ret = PTR_ERR(dir);
-               printk(KERN_ERR "Lookup of %s failed\n", unit);
-               goto bail;
-       }
-
-       remove_file(dir, "flash");
-       remove_file(dir, "atomic_counters");
-       d_delete(dir);
-       ret = simple_rmdir(d_inode(root), dir);
-
-bail:
-       mutex_unlock(&d_inode(root)->i_mutex);
-       dput(root);
-       return ret;
-}
-
-static int ipathfs_fill_super(struct super_block *sb, void *data,
-                             int silent)
-{
-       struct ipath_devdata *dd, *tmp;
-       unsigned long flags;
-       int ret;
-
-       static struct tree_descr files[] = {
-               [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
-               {""},
-       };
-
-       ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
-       if (ret) {
-               printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-               spin_unlock_irqrestore(&ipath_devs_lock, flags);
-               ret = create_device_files(sb, dd);
-               if (ret)
-                       goto bail;
-               spin_lock_irqsave(&ipath_devs_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-bail:
-       return ret;
-}
-
-static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
-                       int flags, const char *dev_name, void *data)
-{
-       struct dentry *ret;
-       ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
-       if (!IS_ERR(ret))
-               ipath_super = ret->d_sb;
-       return ret;
-}
-
-static void ipathfs_kill_super(struct super_block *s)
-{
-       kill_litter_super(s);
-       ipath_super = NULL;
-}
-
-int ipathfs_add_device(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (ipath_super == NULL) {
-               ret = 0;
-               goto bail;
-       }
-
-       ret = create_device_files(ipath_super, dd);
-
-bail:
-       return ret;
-}
-
-int ipathfs_remove_device(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (ipath_super == NULL) {
-               ret = 0;
-               goto bail;
-       }
-
-       ret = remove_device_files(ipath_super, dd);
-
-bail:
-       return ret;
-}
-
-static struct file_system_type ipathfs_fs_type = {
-       .owner =        THIS_MODULE,
-       .name =         "ipathfs",
-       .mount =        ipathfs_mount,
-       .kill_sb =      ipathfs_kill_super,
-};
-MODULE_ALIAS_FS("ipathfs");
-
-int __init ipath_init_ipathfs(void)
-{
-       return register_filesystem(&ipathfs_fs_type);
-}
-
-void __exit ipath_exit_ipathfs(void)
-{
-       unregister_filesystem(&ipathfs_fs_type);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_iba6110.c b/drivers/staging/rdma/ipath/ipath_iba6110.c
deleted file mode 100644 (file)
index 5f13572..0000000
+++ /dev/null
@@ -1,1939 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains all of the code that is specific to the InfiniPath
- * HT chip.
- */
-
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/htirq.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
-
-
-/*
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- *
- * The names are in InterCap form because they're taken straight from
- * the chip specification.  Since they're only used in this file, they
- * don't pollute the rest of the source.
-*/
-
-struct _infinipath_do_not_use_kernel_regs {
-       unsigned long long Revision;
-       unsigned long long Control;
-       unsigned long long PageAlign;
-       unsigned long long PortCnt;
-       unsigned long long DebugPortSelect;
-       unsigned long long DebugPort;
-       unsigned long long SendRegBase;
-       unsigned long long UserRegBase;
-       unsigned long long CounterRegBase;
-       unsigned long long Scratch;
-       unsigned long long ReservedMisc1;
-       unsigned long long InterruptConfig;
-       unsigned long long IntBlocked;
-       unsigned long long IntMask;
-       unsigned long long IntStatus;
-       unsigned long long IntClear;
-       unsigned long long ErrorMask;
-       unsigned long long ErrorStatus;
-       unsigned long long ErrorClear;
-       unsigned long long HwErrMask;
-       unsigned long long HwErrStatus;
-       unsigned long long HwErrClear;
-       unsigned long long HwDiagCtrl;
-       unsigned long long MDIO;
-       unsigned long long IBCStatus;
-       unsigned long long IBCCtrl;
-       unsigned long long ExtStatus;
-       unsigned long long ExtCtrl;
-       unsigned long long GPIOOut;
-       unsigned long long GPIOMask;
-       unsigned long long GPIOStatus;
-       unsigned long long GPIOClear;
-       unsigned long long RcvCtrl;
-       unsigned long long RcvBTHQP;
-       unsigned long long RcvHdrSize;
-       unsigned long long RcvHdrCnt;
-       unsigned long long RcvHdrEntSize;
-       unsigned long long RcvTIDBase;
-       unsigned long long RcvTIDCnt;
-       unsigned long long RcvEgrBase;
-       unsigned long long RcvEgrCnt;
-       unsigned long long RcvBufBase;
-       unsigned long long RcvBufSize;
-       unsigned long long RxIntMemBase;
-       unsigned long long RxIntMemSize;
-       unsigned long long RcvPartitionKey;
-       unsigned long long ReservedRcv[10];
-       unsigned long long SendCtrl;
-       unsigned long long SendPIOBufBase;
-       unsigned long long SendPIOSize;
-       unsigned long long SendPIOBufCnt;
-       unsigned long long SendPIOAvailAddr;
-       unsigned long long TxIntMemBase;
-       unsigned long long TxIntMemSize;
-       unsigned long long ReservedSend[9];
-       unsigned long long SendBufferError;
-       unsigned long long SendBufferErrorCONT1;
-       unsigned long long SendBufferErrorCONT2;
-       unsigned long long SendBufferErrorCONT3;
-       unsigned long long ReservedSBE[4];
-       unsigned long long RcvHdrAddr0;
-       unsigned long long RcvHdrAddr1;
-       unsigned long long RcvHdrAddr2;
-       unsigned long long RcvHdrAddr3;
-       unsigned long long RcvHdrAddr4;
-       unsigned long long RcvHdrAddr5;
-       unsigned long long RcvHdrAddr6;
-       unsigned long long RcvHdrAddr7;
-       unsigned long long RcvHdrAddr8;
-       unsigned long long ReservedRHA[7];
-       unsigned long long RcvHdrTailAddr0;
-       unsigned long long RcvHdrTailAddr1;
-       unsigned long long RcvHdrTailAddr2;
-       unsigned long long RcvHdrTailAddr3;
-       unsigned long long RcvHdrTailAddr4;
-       unsigned long long RcvHdrTailAddr5;
-       unsigned long long RcvHdrTailAddr6;
-       unsigned long long RcvHdrTailAddr7;
-       unsigned long long RcvHdrTailAddr8;
-       unsigned long long ReservedRHTA[7];
-       unsigned long long Sync;        /* Software only */
-       unsigned long long Dump;        /* Software only */
-       unsigned long long SimVer;      /* Software only */
-       unsigned long long ReservedSW[5];
-       unsigned long long SerdesConfig0;
-       unsigned long long SerdesConfig1;
-       unsigned long long SerdesStatus;
-       unsigned long long XGXSConfig;
-       unsigned long long ReservedSW2[4];
-};
-
-struct _infinipath_do_not_use_counters {
-       __u64 LBIntCnt;
-       __u64 LBFlowStallCnt;
-       __u64 Reserved1;
-       __u64 TxUnsupVLErrCnt;
-       __u64 TxDataPktCnt;
-       __u64 TxFlowPktCnt;
-       __u64 TxDwordCnt;
-       __u64 TxLenErrCnt;
-       __u64 TxMaxMinLenErrCnt;
-       __u64 TxUnderrunCnt;
-       __u64 TxFlowStallCnt;
-       __u64 TxDroppedPktCnt;
-       __u64 RxDroppedPktCnt;
-       __u64 RxDataPktCnt;
-       __u64 RxFlowPktCnt;
-       __u64 RxDwordCnt;
-       __u64 RxLenErrCnt;
-       __u64 RxMaxMinLenErrCnt;
-       __u64 RxICRCErrCnt;
-       __u64 RxVCRCErrCnt;
-       __u64 RxFlowCtrlErrCnt;
-       __u64 RxBadFormatCnt;
-       __u64 RxLinkProblemCnt;
-       __u64 RxEBPCnt;
-       __u64 RxLPCRCErrCnt;
-       __u64 RxBufOvflCnt;
-       __u64 RxTIDFullErrCnt;
-       __u64 RxTIDValidErrCnt;
-       __u64 RxPKeyMismatchCnt;
-       __u64 RxP0HdrEgrOvflCnt;
-       __u64 RxP1HdrEgrOvflCnt;
-       __u64 RxP2HdrEgrOvflCnt;
-       __u64 RxP3HdrEgrOvflCnt;
-       __u64 RxP4HdrEgrOvflCnt;
-       __u64 RxP5HdrEgrOvflCnt;
-       __u64 RxP6HdrEgrOvflCnt;
-       __u64 RxP7HdrEgrOvflCnt;
-       __u64 RxP8HdrEgrOvflCnt;
-       __u64 Reserved6;
-       __u64 Reserved7;
-       __u64 IBStatusChangeCnt;
-       __u64 IBLinkErrRecoveryCnt;
-       __u64 IBLinkDownedCnt;
-       __u64 IBSymbolErrCnt;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
-       struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-       struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_ht_kregs = {
-       .kr_control = IPATH_KREG_OFFSET(Control),
-       .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-       .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
-       .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-       .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-       .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-       .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-       .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-       .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-       .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-       .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-       .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-       .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-       .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-       .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-       .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-       .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-       .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-       .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-       .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-       .kr_intclear = IPATH_KREG_OFFSET(IntClear),
-       .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
-       .kr_intmask = IPATH_KREG_OFFSET(IntMask),
-       .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-       .kr_mdio = IPATH_KREG_OFFSET(MDIO),
-       .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-       .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-       .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-       .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-       .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-       .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-       .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-       .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-       .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-       .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-       .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-       .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-       .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-       .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-       .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-       .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-       .kr_revision = IPATH_KREG_OFFSET(Revision),
-       .kr_scratch = IPATH_KREG_OFFSET(Scratch),
-       .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-       .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-       .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-       .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-       .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-       .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-       .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-       .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-       .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-       .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-       .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-       .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-       .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-       .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-       /*
-        * These should not be used directly via ipath_write_kreg64(),
-        * use them with ipath_write_kreg64_port(),
-        */
-       .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-       .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
-};
-
-static const struct ipath_cregs ipath_ht_cregs = {
-       .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-       .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-       .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-       .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-       .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-       .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-       .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-       .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-       .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-       .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-       .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-       .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-       /* calc from Reg_CounterRegBase + offset */
-       .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-       .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-       .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-       .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-       .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-       .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-       .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-       .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-       .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-       .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-       .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-       .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-       .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-       .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-       .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-       .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-       .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-       .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-       .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-       .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-       .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
-#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
-#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
-#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
-#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
-#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
-#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
-#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
-#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
-#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
-
-#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
-#define IBA6110_IBCS_LINKSTATE_SHIFT 4
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
-
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL        /* 40 bits valid */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
-
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD_SHIFT 31
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET          0x7ULL
-
-/*
- * masks and bits that are different in different chips, or present only
- * in one
- */
-static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
-    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
-static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
-    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
-
-static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA \
-       (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL \
-       (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/* keep the code below somewhat more readable; not used elsewhere */
-#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkabyte1crcerr)
-#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |    \
-                               infinipath_hwe_htclnkbbyte0crcerr)
-#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |    \
-                               infinipath_hwe_htclnkbbyte1crcerr)
-
-static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
-                         char *msg, size_t msgl)
-{
-       char bitsmsg[64];
-       ipath_err_t crcbits = hwerrs &
-               (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't check if 8bit HT */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
-       /*
-        * we'll want to ignore link errors on link that is
-        * not in use, if any.  For now, complain about both
-        */
-       if (crcbits) {
-               u16 ctrl0, ctrl1;
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
-                        !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
-                        "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
-                                   ? "1 (B)" : "0+1 (A+B)"),
-                        !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
-                        : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
-                           "0+1"), (unsigned long long) crcbits);
-               strlcat(msg, bitsmsg, msgl);
-
-               /*
-                * print extra info for debugging.  slave/primary
-                * config word 4, 8 (link control 0, 1)
-                */
-
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x4,
-                                        &ctrl0))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl0 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl0 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
-                                 ((ctrl0 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl0 >> 4) & 1) ? "linkfail" :
-                                 "");
-               if (pci_read_config_word(dd->pcidev,
-                                        dd->ipath_ht_slave_off + 0x8,
-                                        &ctrl1))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkctrl1 of slave/primary "
-                                "config block\n");
-               else if (!(ctrl1 & 1 << 6))
-                       /* not if EOC bit set */
-                       ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
-                                 ((ctrl1 >> 8) & 7) ? " CRC" : "",
-                                 ((ctrl1 >> 4) & 1) ? "linkfail" :
-                                 "");
-
-               /* disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~crcbits;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-               ipath_dbg("HT crc errs: %s\n", msg);
-       } else
-               ipath_dbg("ignoring HT crc errors 0x%llx, "
-                         "not in use\n", (unsigned long long)
-                         (hwerrs & (_IPATH_HTLINK0_CRCBITS |
-                                    _IPATH_HTLINK1_CRCBITS)));
-}
-
-/* 6110 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
-       INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
-       INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
-       INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
-       INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
-       INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
-       INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
-       INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
-       INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
-};
-
-#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
-                       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
-                       << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
-#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
-                         << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
-
-static void ipath_ht_txe_recover(struct ipath_devdata *dd)
-{
-       ++ipath_stats.sps_txeparity;
-       dev_info(&dd->pcidev->dev,
-               "Recovering from TXE PIO parity error\n");
-}
-
-
-/**
- * ipath_ht_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use.  Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue.  We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-                                    size_t msgl)
-{
-       ipath_err_t hwerrs;
-       u32 bits, ctrl;
-       int isfatal = 0;
-       char bitsmsg[64];
-       int log_idx;
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-
-       if (!hwerrs) {
-               ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-               /*
-                * better than printing cofusing messages
-                * This seems to be related to clearing the crc error, or
-                * the pll error during init.
-                */
-               goto bail;
-       } else if (hwerrs == -1LL) {
-               ipath_dev_err(dd, "Read of hardware error status failed "
-                             "(all bits set); ignoring\n");
-               goto bail;
-       }
-       ipath_stats.sps_hwerrs++;
-
-       /* Always clear the error status register, except MEMBISTFAIL,
-        * regardless of whether we continue or stop using the chip.
-        * We want that set so we know it failed, even across driver reload.
-        * We'll still ignore it in the hwerrmask.  We do this partly for
-        * diagnostics, but also for support */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-       hwerrs &= dd->ipath_hwerrmask;
-
-       /* We log some errors to EEPROM, check if we have any of those. */
-       for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
-               if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
-                       ipath_inc_eeprom_err(dd, log_idx, 1);
-
-       /*
-        * make sure we get this much out, unless told to be quiet,
-        * it's a parity error we may recover from,
-        * or it's occurred within the last 5 seconds
-        */
-       if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
-               RXE_EAGER_PARITY)) ||
-               (ipath_debug & __IPATH_VERBDBG))
-               dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-                        "(cleared)\n", (unsigned long long) hwerrs);
-       dd->ipath_lasthwerror |= hwerrs;
-
-       if (hwerrs & ~dd->ipath_hwe_bitsextant)
-               ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (hwerrs & ~dd->ipath_hwe_bitsextant));
-
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-       if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
-               /*
-                * parity errors in send memory are recoverable,
-                * just cancel the send (if indicated in * sendbuffererror),
-                * count the occurrence, unfreeze (if no other handled
-                * hardware error bits are set), and continue. They can
-                * occur if a processor speculative read is done to the PIO
-                * buffer while we are sending a packet, for example.
-                */
-               if (hwerrs & TXE_PIO_PARITY) {
-                       ipath_ht_txe_recover(dd);
-                       hwerrs &= ~TXE_PIO_PARITY;
-               }
-
-               if (!hwerrs) {
-                       ipath_dbg("Clearing freezemode on ignored or "
-                                 "recovered hardware error\n");
-                       ipath_clear_freeze(dd);
-               }
-       }
-
-       *msg = '\0';
-
-       /*
-        * may someday want to decode into which bits are which
-        * functional area for parity errors, etc.
-        */
-       if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
-                     << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
-               bits = (u32) ((hwerrs >>
-                              INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
-                             INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
-               snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
-                        bits);
-               strlcat(msg, bitsmsg, msgl);
-       }
-
-       ipath_format_hwerrors(hwerrs,
-                             ipath_6110_hwerror_msgs,
-                             ARRAY_SIZE(ipath_6110_hwerror_msgs),
-                             msg, msgl);
-
-       if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
-               hwerr_crcbits(dd, hwerrs, msg, msgl);
-
-       if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-               strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
-                       msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |       \
-                        INFINIPATH_HWE_COREPLL_RFSLIP |        \
-                        INFINIPATH_HWE_HTBPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTBPLL_RFSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_FBSLIP |         \
-                        INFINIPATH_HWE_HTAPLL_RFSLIP)
-
-       if (hwerrs & _IPATH_PLL_FAIL) {
-               snprintf(bitsmsg, sizeof bitsmsg,
-                        "[PLL failed (%llx), InfiniPath hardware unusable]",
-                        (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
-               strlcat(msg, bitsmsg, msgl);
-               /* ignore from now on, so disable until driver reloaded */
-               dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-               /*
-                * If it occurs, it is left masked since the eternal
-                * interface is unused
-                */
-               dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                                dd->ipath_hwerrmask);
-       }
-
-       if (hwerrs) {
-               /*
-                * if any set that we aren't ignoring; only
-                * make the complaint once, in case it's stuck
-                * or recurring, and we get here multiple
-                * times.
-                * force link down, so switch knows, and
-                * LEDs are turned off
-                */
-               if (dd->ipath_flags & IPATH_INITTED) {
-                       ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-                       ipath_setup_ht_setextled(dd,
-                               INFINIPATH_IBCS_L_STATE_DOWN,
-                               INFINIPATH_IBCS_LT_STATE_DISABLED);
-                       ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-                                         "mode), no longer usable, SN %.16s\n",
-                                         dd->ipath_serial);
-                       isfatal = 1;
-               }
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-               /* mark as having had error */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               /*
-                * mark as not usable, at a minimum until driver
-                * is reloaded, probably until reboot, since no
-                * other reset is possible.
-                */
-               dd->ipath_flags &= ~IPATH_INITTED;
-       } else {
-               *msg = 0; /* recovered from all of them */
-       }
-       if (*msg)
-               ipath_dev_err(dd, "%s hardware error\n", msg);
-       if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
-               /*
-                * for status file; if no trailing brace is copied,
-                * we'll know it was truncated.
-                */
-               snprintf(dd->ipath_freezemsg,
-                        dd->ipath_freezelen, "{%s}", msg);
-
-bail:;
-}
-
-/**
- * ipath_ht_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * fill in the board name, based on the board revision register
- */
-static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
-                             size_t namelen)
-{
-       char *n = NULL;
-       u8 boardrev = dd->ipath_boardrev;
-       int ret = 0;
-
-       switch (boardrev) {
-       case 5:
-               /*
-                * original production board; two production levels, with
-                * different serial number ranges.   See ipath_ht_early_init() for
-                * case where we enable IPATH_GPIO_INTR for later serial # range.
-                * Original 112* serial number is no longer supported.
-                */
-               n = "InfiniPath_QHT7040";
-               break;
-       case 7:
-               /* small form factor production board */
-               n = "InfiniPath_QHT7140";
-               break;
-       default:                /* don't know, just print the number */
-               ipath_dev_err(dd, "Don't yet know about board "
-                             "with ID %u\n", boardrev);
-               snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
-                        boardrev);
-               break;
-       }
-       if (n)
-               snprintf(name, namelen, "%s", n);
-
-       if (ret) {
-               ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
-               goto bail;
-       }
-       if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-               dd->ipath_minrev > 4)) {
-               /*
-                * This version of the driver only supports Rev 3.2 - 3.4
-                */
-               ipath_dev_err(dd,
-                             "Unsupported InfiniPath hardware revision %u.%u!\n",
-                             dd->ipath_majrev, dd->ipath_minrev);
-               ret = 1;
-               goto bail;
-       }
-       /*
-        * pkt/word counters are 32 bit, and therefore wrap fast enough
-        * that we snapshot them from a timer, and maintain 64 bit shadow
-        * copies
-        */
-       dd->ipath_flags |= IPATH_32BITCOUNTERS;
-       dd->ipath_flags |= IPATH_GPIO_INTR;
-       if (dd->ipath_lbus_speed != 800)
-               ipath_dev_err(dd,
-                             "Incorrectly configured for HT @ %uMHz\n",
-                             dd->ipath_lbus_speed);
-
-       /*
-        * set here, not in ipath_init_*_funcs because we have to do
-        * it after we can read chip registers.
-        */
-       dd->ipath_ureg_align =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-
-bail:
-       return ret;
-}
-
-static void ipath_check_htlink(struct ipath_devdata *dd)
-{
-       u8 linkerr, link_off, i;
-
-       for (i = 0; i < 2; i++) {
-               link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
-               if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
-                       dev_info(&dd->pcidev->dev, "Couldn't read "
-                                "linkerror%d of HT slave/primary block\n",
-                                i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set should
-                        * clear them
-                        */
-                       if (pci_write_config_byte(dd->pcidev, link_off,
-                                                 linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(dd->pcidev, link_off,
-                                                &linkerr))
-                               dev_info(&dd->pcidev->dev,
-                                        "Couldn't reread linkerror%d of "
-                                        "HT slave/primary block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&dd->pcidev->dev,
-                                        "HT linkerror%d bits 0x%x "
-                                        "couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-}
-
-static int ipath_setup_ht_reset(struct ipath_devdata *dd)
-{
-       ipath_dbg("No reset possible for this InfiniPath hardware\n");
-       return 0;
-}
-
-#define HT_INTR_DISC_CONFIG  0x80      /* HT interrupt and discovery cap */
-#define HT_INTR_REG_INDEX    2 /* intconfig requires indirect accesses */
-
-/*
- * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
- * errors.  We only bother to do this at load time, because it's OK if
- * it happened before we were loaded (first time after boot/reset),
- * but any time after that, it's fatal anyway.  Also need to not check
- * for upper byte errors if we are in 8 bit mode, so figure out
- * our width.  For now, at least, also complain if it's 8 bit.
- */
-static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
-                            int pos, u8 cap_type)
-{
-       u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
-       u16 linkctrl = 0;
-       int i;
-
-       dd->ipath_ht_slave_off = pos;
-       /* command word, master_host bit */
-       /* master host || slave */
-       if ((cap_type >> 2) & 1)
-               link_a_b_off = 4;
-       else
-               link_a_b_off = 0;
-       ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
-                  link_a_b_off ? 1 : 0,
-                  link_a_b_off ? 'B' : 'A');
-
-       link_a_b_off += pos;
-
-       /*
-        * check both link control registers; clear both HT CRC sets if
-        * necessary.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0x4;
-               if (pci_read_config_word(pdev, link_off, &linkctrl))
-                       ipath_dev_err(dd, "Couldn't read HT link control%d "
-                                     "register\n", i);
-               else if (linkctrl & (0xf << 8)) {
-                       ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
-                                  "bits %x\n", i, linkctrl & (0xf << 8));
-                       /*
-                        * now write them back to clear the error.
-                        */
-                       pci_write_config_word(pdev, link_off,
-                                             linkctrl & (0xf << 8));
-               }
-       }
-
-       /*
-        * As with HT CRC bits, same for protocol errors that might occur
-        * during boot.
-        */
-       for (i = 0; i < 2; i++) {
-               link_off = pos + i * 4 + 0xd;
-               if (pci_read_config_byte(pdev, link_off, &linkerr))
-                       dev_info(&pdev->dev, "Couldn't read linkerror%d "
-                                "of HT slave/primary block\n", i);
-               else if (linkerr & 0xf0) {
-                       ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-                                  "clearing\n", linkerr >> 4, i);
-                       /*
-                        * writing the linkerr bits that are set will clear
-                        * them
-                        */
-                       if (pci_write_config_byte
-                           (pdev, link_off, linkerr))
-                               ipath_dbg("Failed write to clear HT "
-                                         "linkerror%d\n", i);
-                       if (pci_read_config_byte(pdev, link_off, &linkerr))
-                               dev_info(&pdev->dev, "Couldn't reread "
-                                        "linkerror%d of HT slave/primary "
-                                        "block\n", i);
-                       else if (linkerr & 0xf0)
-                               dev_info(&pdev->dev, "HT linkerror%d bits "
-                                        "0x%x couldn't be cleared\n",
-                                        i, linkerr >> 4);
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-
-       if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link width "
-                             "config register\n");
-       else {
-               u32 width;
-               switch (linkwidth & 7) {
-               case 5:
-                       width = 4;
-                       break;
-               case 4:
-                       width = 2;
-                       break;
-               case 3:
-                       width = 32;
-                       break;
-               case 1:
-                       width = 16;
-                       break;
-               case 0:
-               default:        /* if wrong, assume 8 bit */
-                       width = 8;
-                       break;
-               }
-
-               dd->ipath_lbus_width = width;
-
-               if (linkwidth != 0x11) {
-                       ipath_dev_err(dd, "Not configured for 16 bit HT "
-                                     "(%x)\n", linkwidth);
-                       if (!(linkwidth & 0xf)) {
-                               ipath_dbg("Will ignore HT lane1 errors\n");
-                               dd->ipath_flags |= IPATH_8BIT_IN_HT0;
-                       }
-               }
-       }
-
-       /*
-        * this is just for our link to the host, not devices connected
-        * through tunnel.
-        */
-       if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
-               ipath_dev_err(dd, "Couldn't read HT link frequency "
-                             "config register\n");
-       else {
-               u32 speed;
-               switch (linkwidth & 0xf) {
-               case 6:
-                       speed = 1000;
-                       break;
-               case 5:
-                       speed = 800;
-                       break;
-               case 4:
-                       speed = 600;
-                       break;
-               case 3:
-                       speed = 500;
-                       break;
-               case 2:
-                       speed = 400;
-                       break;
-               case 1:
-                       speed = 300;
-                       break;
-               default:
-                       /*
-                        * assume reserved and vendor-specific are 200...
-                        */
-               case 0:
-                       speed = 200;
-                       break;
-               }
-               dd->ipath_lbus_speed = speed;
-       }
-
-       snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
-               "HyperTransport,%uMHz,x%u\n",
-               dd->ipath_lbus_speed,
-               dd->ipath_lbus_width);
-}
-
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
-       int ret;
-
-       if (dd->ipath_intconfig) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
-                                dd->ipath_intconfig);  /* interrupt address */
-               ret = 0;
-       } else {
-               ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
-                             "interrupt address\n");
-               ret = -EINVAL;
-       }
-
-       return ret;
-}
-
-static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
-                               struct ht_irq_msg *msg)
-{
-       struct ipath_devdata *dd = pci_get_drvdata(dev);
-       u64 prev_intconfig = dd->ipath_intconfig;
-
-       dd->ipath_intconfig = msg->address_lo;
-       dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
-
-       /*
-        * If the previous value of dd->ipath_intconfig is zero, we're
-        * getting configured for the first time, and must not program the
-        * intconfig register here (it will be programmed later, when the
-        * hardware is ready).  Otherwise, we should.
-        */
-       if (prev_intconfig)
-               ipath_ht_intconfig(dd);
-}
-
-/**
- * ipath_setup_ht_config - setup the interruptconfig register
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * setup the interruptconfig register from the HT config info.
- * Also clear CRC errors in HT linkcontrol, if necessary.
- * This is done only for the real hardware.  It is done before
- * chip address space is initted, so can't touch infinipath registers
- */
-static int ipath_setup_ht_config(struct ipath_devdata *dd,
-                                struct pci_dev *pdev)
-{
-       int pos, ret;
-
-       ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
-       if (ret < 0) {
-               ipath_dev_err(dd, "Couldn't create interrupt handler: "
-                             "err %d\n", ret);
-               goto bail;
-       }
-       dd->ipath_irq = ret;
-       ret = 0;
-
-       /*
-        * Handle clearing CRC errors in linkctrl register if necessary.  We
-        * do this early, before we ever enable errors or hardware errors,
-        * mostly to avoid causing the chip to enter freeze mode.
-        */
-       pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
-       if (!pos) {
-               ipath_dev_err(dd, "Couldn't find HyperTransport "
-                             "capability; no interrupts\n");
-               ret = -ENODEV;
-               goto bail;
-       }
-       do {
-               u8 cap_type;
-
-               /*
-                * The HT capability type byte is 3 bytes after the
-                * capability byte.
-                */
-               if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
-                       dev_info(&pdev->dev, "Couldn't read config "
-                                "command @ %d\n", pos);
-                       continue;
-               }
-               if (!(cap_type & 0xE0))
-                       slave_or_pri_blk(dd, pdev, pos, cap_type);
-       } while ((pos = pci_find_next_capability(pdev, pos,
-                                                PCI_CAP_ID_HT)));
-
-       dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Called during driver unload.
- * This is currently a nop for the HT chip, not for all chips
- */
-static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
-{
-}
-
-/**
- * ipath_setup_ht_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * Set the state of the two external LEDs, to indicate physical and
- * logical state of IB link.   For this chip (at least with recommended
- * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
- * (logical state)
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
-                                    u64 lst, u64 ltst)
-{
-       u64 extctl;
-       unsigned long flags = 0;
-
-       /* the diags use the LED to indicate diag info, so we leave
-        * the external LED alone when the diags are running */
-       if (ipath_diag_inuse)
-               return;
-
-       /* Allow override of LED display for, e.g. Locating system in rack */
-       if (dd->ipath_led_override) {
-               ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
-                       ? INFINIPATH_IBCS_LT_STATE_LINKUP
-                       : INFINIPATH_IBCS_LT_STATE_DISABLED;
-               lst = (dd->ipath_led_override & IPATH_LED_LOG)
-                       ? INFINIPATH_IBCS_L_STATE_ACTIVE
-                       : INFINIPATH_IBCS_L_STATE_DOWN;
-       }
-
-       spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-       /*
-        * start by setting both LED control bits to off, then turn
-        * on the appropriate bit(s).
-        */
-       if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
-               /*
-                * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
-                * is inverted,  because it is normally used to indicate
-                * a hardware fault at reset, if there were errors
-                */
-               extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
-                       | INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
-       } else {
-               extctl = dd->ipath_extctrl &
-                       ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-                         INFINIPATH_EXTC_LED2PRIPORT_ON);
-               if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-                       extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-               if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-                       extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-       }
-       dd->ipath_extctrl = extctl;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-       spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-}
-
-static void ipath_init_ht_variables(struct ipath_devdata *dd)
-{
-       /*
-        * setup the register offsets, since they are different for each
-        * chip
-        */
-       dd->ipath_kregs = &ipath_ht_kregs;
-       dd->ipath_cregs = &ipath_ht_cregs;
-
-       dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-       dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-       dd->ipath_gpio_sda = IPATH_GPIO_SDA;
-       dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
-       /*
-        * Fill in data for field-values that change in newer chips.
-        * We dynamically specify only the mask for LINKTRAININGSTATE
-        * and only the shift for LINKSTATE, as they are the only ones
-        * that change.  Also precalculate the 3 link states of interest
-        * and the combined mask.
-        */
-       dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
-       dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
-       dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
-               dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
-       dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
-       dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
-       dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-               INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-               (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
-       /*
-        * Fill in data for ibcc field-values that change in newer chips.
-        * We dynamically specify only the mask for LINKINITCMD
-        * and only the shift for LINKCMD and MAXPKTLEN, as they are
-        * the only ones that change.
-        */
-       dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
-       dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
-       dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-
-       /* Fill in shifts for RcvCtrl. */
-       dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
-       dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
-       dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
-       dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
-
-       dd->ipath_i_bitsextant =
-               (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-               (INFINIPATH_I_RCVAVAIL_MASK <<
-                INFINIPATH_I_RCVAVAIL_SHIFT) |
-               INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-               INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-
-       dd->ipath_e_bitsextant =
-               INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-               INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-               INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-               INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-               INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-               INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-               INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-               INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-               INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-               INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-               INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-               INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-               INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-               INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-               INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-               INFINIPATH_E_HARDWARE;
-
-       dd->ipath_hwe_bitsextant =
-               (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
-               (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-                INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-               INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
-               INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
-               INFINIPATH_HWE_HTCMISCERR4 |
-               INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
-               INFINIPATH_HWE_HTCMISCERR7 |
-               INFINIPATH_HWE_HTCBUSTREQPARITYERR |
-               INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
-               INFINIPATH_HWE_HTCBUSIREQPARITYERR |
-               INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
-               INFINIPATH_HWE_MEMBISTFAILED |
-               INFINIPATH_HWE_COREPLL_FBSLIP |
-               INFINIPATH_HWE_COREPLL_RFSLIP |
-               INFINIPATH_HWE_HTBPLL_FBSLIP |
-               INFINIPATH_HWE_HTBPLL_RFSLIP |
-               INFINIPATH_HWE_HTAPLL_FBSLIP |
-               INFINIPATH_HWE_HTAPLL_RFSLIP |
-               INFINIPATH_HWE_SERDESPLLFAILED |
-               INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-               INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-
-       dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-       dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-       dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
-       dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
-
-       /*
-        * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
-        * 2 is Some Misc, 3 is reserved for future.
-        */
-       dd->ipath_eep_st_masks[0].hwerrs_to_log =
-               INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-               INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
-       dd->ipath_eep_st_masks[1].hwerrs_to_log =
-               INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-               INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
-       dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
-
-       dd->delay_mult = 2; /* SDR, 4X, can't change */
-
-       dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
-       dd->ipath_link_speed_supported = IPATH_IB_SDR;
-       dd->ipath_link_width_enabled = IB_WIDTH_4X;
-       dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
-       /* these can't change for this chip, so set once */
-       dd->ipath_link_width_active = dd->ipath_link_width_enabled;
-       dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
-}
-
-/**
- * ipath_ht_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
-{
-       ipath_err_t val;
-       u64 extsval;
-
-       extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-       if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-               ipath_dev_err(dd, "MemBIST did not complete!\n");
-       if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
-               ipath_dbg("MemBIST corrected\n");
-
-       ipath_check_htlink(dd);
-
-       /* barring bugs, all hwerrors become interrupts, which can */
-       val = -1LL;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-               val &= ~infinipath_hwe_htclnkabyte1crcerr;
-       /* don't look at crc lane1 if 8 bit */
-       if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-               val &= ~infinipath_hwe_htclnkbbyte1crcerr;
-
-       /*
-        * disable RXDSYNCMEMPARITY because external serdes is unused,
-        * and therefore the logic will never be used or initialized,
-        * and uninitialized state will normally result in this error
-        * being asserted.  Similarly for the external serdess pll
-        * lock signal.
-        */
-       val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
-                INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
-
-       /*
-        * Disable MISCERR4 because of an inversion in the HT core
-        * logic checking for errors that cause this bit to be set.
-        * The errata can also cause the protocol error bit to be set
-        * in the HT config space linkerror register(s).
-        */
-       val &= ~INFINIPATH_HWE_HTCMISCERR4;
-
-       /*
-        * PLL ignored because unused MDIO interface has a logic problem
-        */
-       if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
-               val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-       dd->ipath_hwerrmask = val;
-}
-
-
-
-
-/**
- * ipath_ht_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
-{
-       u64 val, config1;
-       int ret = 0, change = 0;
-
-       ipath_dbg("Trying to bringup serdes\n");
-
-       if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-           INFINIPATH_HWE_SERDESPLLFAILED)
-       {
-               ipath_dbg("At start, serdes PLL failed bit set in "
-                         "hwerrstatus, clearing and continuing\n");
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                                INFINIPATH_HWE_SERDESPLLFAILED);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-       config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-       ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       /* force reset on */
-       val |= INFINIPATH_SERDC0_RESET_PLL
-               /* | INFINIPATH_SERDC0_RESET_MASK */
-               ;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-       udelay(15);             /* need pll reset set at least for a bit */
-
-       if (val & INFINIPATH_SERDC0_RESET_PLL) {
-               u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
-               /* set lane resets, and tx idle, during pll reset */
-               val2 |= INFINIPATH_SERDC0_RESET_MASK |
-                       INFINIPATH_SERDC0_TXIDLE;
-               ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
-                          "%llx)\n", (unsigned long long) val2);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val2);
-               /*
-                * be sure chip saw it
-                */
-               val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               /*
-                * need pll reset clear at least 11 usec before lane
-                * resets cleared; give it a few more
-                */
-               udelay(15);
-               val = val2;     /* for check below */
-       }
-
-       if (val & (INFINIPATH_SERDC0_RESET_PLL |
-                  INFINIPATH_SERDC0_RESET_MASK |
-                  INFINIPATH_SERDC0_TXIDLE)) {
-               val &= ~(INFINIPATH_SERDC0_RESET_PLL |
-                        INFINIPATH_SERDC0_RESET_MASK |
-                        INFINIPATH_SERDC0_TXIDLE);
-               /* clear them */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-                                val);
-       }
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       if (val & INFINIPATH_XGXS_RESET) {
-               /* normally true after boot */
-               val &= ~INFINIPATH_XGXS_RESET;
-               change = 1;
-       }
-       if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
-            INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
-               /* need to compensate for Tx inversion in partner */
-               val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT);
-               val |= dd->ipath_rx_pol_inv <<
-                       INFINIPATH_XGXS_RX_POL_SHIFT;
-               change = 1;
-       }
-       if (change)
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       /* clear current and de-emphasis bits */
-       config1 &= ~0x0ffffffff00ULL;
-       /* set current to 20ma */
-       config1 |= 0x00000000000ULL;
-       /* set de-emphasis to -5.68dB */
-       config1 |= 0x0cccc000000ULL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-       ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
-                  "config1=%llx, sstatus=%llx xgxs %llx\n",
-                  (unsigned long long) val, (unsigned long long) config1,
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-                  (unsigned long long)
-                  ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-       return ret;             /* for now, say we always succeeded */
-}
-
-/**
- * ipath_ht_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * driver is being unloaded
- */
-static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
-{
-       u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-       val |= INFINIPATH_SERDC0_TXIDLE;
-       ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-                 (unsigned long long) val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_ht_put_tid(struct ipath_devdata *dd,
-                            u64 __iomem *tidptr, u32 type,
-                            unsigned long pa)
-{
-       if (!dd->ipath_kregbase)
-               return;
-
-       if (pa != dd->ipath_tidinvalid) {
-               if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
-                       dev_info(&dd->pcidev->dev,
-                                "physaddr %lx has more than "
-                                "40 bits, using only 40!!!\n", pa);
-                       pa &= INFINIPATH_RT_ADDR_MASK;
-               }
-               if (type == RCVHQ_RCV_TYPE_EAGER)
-                       pa |= dd->ipath_tidtemplate;
-               else {
-                       /* in words (fixed, full page).  */
-                       u64 lenvalid = PAGE_SIZE >> 2;
-                       lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-                       pa |= lenvalid | INFINIPATH_RT_VALID;
-               }
-       }
-
-       writeq(pa, tidptr);
-}
-
-
-/**
- * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * Used from ipath_close(), and at chip initialization.
- */
-static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-       u64 __iomem *tidbase;
-       int i;
-
-       if (!dd->ipath_kregbase)
-               return;
-
-       ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-       /*
-        * need to invalidate all of the expected TID entries for this
-        * port, so we don't have valid entries that might somehow get
-        * used (early in next use of this port, or through some bug)
-        */
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvtidbase +
-                                  port * dd->ipath_rcvtidcnt *
-                                  sizeof(*tidbase));
-       for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
-                                dd->ipath_tidinvalid);
-
-       tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-                                  dd->ipath_rcvegrbase +
-                                  port * dd->ipath_rcvegrcnt *
-                                  sizeof(*tidbase));
-
-       for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-               ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
-                                dd->ipath_tidinvalid);
-}
-
-/**
- * ipath_ht_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
-{
-       dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
-       dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-       dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
-
-       /*
-        * work around chip errata bug 7358, by marking invalid tids
-        * as having max length
-        */
-       dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
-               INFINIPATH_RT_BUFSIZE_SHIFT;
-}
-
-static int ipath_ht_early_init(struct ipath_devdata *dd)
-{
-       u32 __iomem *piobuf;
-       u32 pioincr, val32;
-       int i;
-
-       /*
-        * one cache line; long IB headers will spill over into received
-        * buffer
-        */
-       dd->ipath_rcvhdrentsize = 16;
-       dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-       /*
-        * For HT, we allocate a somewhat overly large eager buffer,
-        * such that we can guarantee that we can receive the largest
-        * packet that we can send out.  To truly support a 4KB MTU,
-        * we need to bump this to a large value.  To date, other than
-        * testing, we have never encountered an HCA that can really
-        * send 4KB MTU packets, so we do not handle that (we'll get
-        * errors interrupts if we ever see one).
-        */
-       dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-
-       /*
-        * the min() check here is currently a nop, but it may not
-        * always be, depending on just how we do ipath_rcvegrbufsize
-        */
-       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-                                dd->ipath_rcvegrbufsize);
-       dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-       ipath_ht_tidtemplate(dd);
-
-       /*
-        * zero all the TID entries at startup.  We do this for sanity,
-        * in case of a previous driver crash of some kind, and also
-        * because the chip powers up with these memories in an unknown
-        * state.  Use portcnt, not cfgports, since this is for the
-        * full chip, not for current (possibly different) configuration
-        * value.
-        * Chip Errata bug 6447
-        */
-       for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
-               ipath_ht_clear_tids(dd, val32);
-
-       /*
-        * write the pbc of each buffer, to be sure it's initialized, then
-        * cancel all the buffers, and also abort any packets that might
-        * have been in flight for some reason (the latter is for driver
-        * unload/reload, but isn't a bad idea at first init).  PIO send
-        * isn't enabled at this point, so there is no danger of sending
-        * these out on the wire.
-        * Chip Errata bug 6610
-        */
-       piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
-                                 dd->ipath_piobufbase);
-       pioincr = dd->ipath_palign / sizeof(*piobuf);
-       for (i = 0; i < dd->ipath_piobcnt2k; i++) {
-               /*
-                * reasonable word count, just to init pbc
-                */
-               writel(16, piobuf);
-               piobuf += pioincr;
-       }
-
-       ipath_get_eeprom_info(dd);
-       if (dd->ipath_boardrev == 5) {
-               /*
-                * Later production QHT7040 has same changes as QHT7140, so
-                * can use GPIO interrupts.  They have serial #'s starting
-                * with 128, rather than 112.
-                */
-               if (dd->ipath_serial[0] == '1' &&
-                   dd->ipath_serial[1] == '2' &&
-                   dd->ipath_serial[2] == '8')
-                       dd->ipath_flags |= IPATH_GPIO_INTR;
-               else {
-                       ipath_dev_err(dd, "Unsupported InfiniPath board "
-                               "(serial number %.16s)!\n",
-                               dd->ipath_serial);
-                       return 1;
-               }
-       }
-
-       if (dd->ipath_minrev >= 4) {
-               /* Rev4+ reports extra errors via internal GPIO pins */
-               dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
-               dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-       }
-
-       return 0;
-}
-
-
-/**
- * ipath_init_ht_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithms.
- */
-static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-       struct ipath_base_info *kinfo = kbase;
-
-       kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-               IPATH_RUNTIME_PIO_REGSWAPPED;
-
-       if (pd->port_dd->ipath_minrev < 4)
-               kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
-
-       return 0;
-}
-
-static void ipath_ht_free_irq(struct ipath_devdata *dd)
-{
-       free_irq(dd->ipath_irq, dd);
-       ht_destroy_irq(dd->ipath_irq);
-       dd->ipath_irq = 0;
-       dd->ipath_intconfig = 0;
-}
-
-static struct ipath_message_header *
-ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
-       return (struct ipath_message_header *)
-               &rhf_addr[sizeof(u64) / sizeof(u32)];
-}
-
-static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
-       dd->ipath_portcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
-       dd->ipath_p0_rcvegrcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-}
-
-static void ipath_ht_read_counters(struct ipath_devdata *dd,
-                                  struct infinipath_counters *cntrs)
-{
-       cntrs->LBIntCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
-       cntrs->LBFlowStallCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
-       cntrs->TxSDmaDescCnt = 0;
-       cntrs->TxUnsupVLErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
-       cntrs->TxDataPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
-       cntrs->TxFlowPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
-       cntrs->TxDwordCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
-       cntrs->TxLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
-       cntrs->TxMaxMinLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
-       cntrs->TxUnderrunCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
-       cntrs->TxFlowStallCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
-       cntrs->TxDroppedPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
-       cntrs->RxDroppedPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
-       cntrs->RxDataPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
-       cntrs->RxFlowPktCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
-       cntrs->RxDwordCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
-       cntrs->RxLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
-       cntrs->RxMaxMinLenErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
-       cntrs->RxICRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
-       cntrs->RxVCRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
-       cntrs->RxFlowCtrlErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
-       cntrs->RxBadFormatCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
-       cntrs->RxLinkProblemCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
-       cntrs->RxEBPCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
-       cntrs->RxLPCRCErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
-       cntrs->RxBufOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
-       cntrs->RxTIDFullErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
-       cntrs->RxTIDValidErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
-       cntrs->RxPKeyMismatchCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
-       cntrs->RxP0HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
-       cntrs->RxP1HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
-       cntrs->RxP2HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
-       cntrs->RxP3HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
-       cntrs->RxP4HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
-       cntrs->RxP5HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
-       cntrs->RxP6HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
-       cntrs->RxP7HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
-       cntrs->RxP8HdrEgrOvflCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
-       cntrs->RxP9HdrEgrOvflCnt = 0;
-       cntrs->RxP10HdrEgrOvflCnt = 0;
-       cntrs->RxP11HdrEgrOvflCnt = 0;
-       cntrs->RxP12HdrEgrOvflCnt = 0;
-       cntrs->RxP13HdrEgrOvflCnt = 0;
-       cntrs->RxP14HdrEgrOvflCnt = 0;
-       cntrs->RxP15HdrEgrOvflCnt = 0;
-       cntrs->RxP16HdrEgrOvflCnt = 0;
-       cntrs->IBStatusChangeCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
-       cntrs->IBLinkErrRecoveryCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
-       cntrs->IBLinkDownedCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
-       cntrs->IBSymbolErrCnt =
-               ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
-       cntrs->RxVL15DroppedPktCnt = 0;
-       cntrs->RxOtherLocalPhyErrCnt = 0;
-       cntrs->PcieRetryBufDiagQwordCnt = 0;
-       cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
-       cntrs->LocalLinkIntegrityErrCnt =
-               (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-               dd->ipath_lli_errs : dd->ipath_lli_errors;
-       cntrs->RxVlErrCnt = 0;
-       cntrs->RxDlidFltrCnt = 0;
-}
-
-
-/* no interrupt fallback for these chips */
-static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
-{
-       return 0;
-}
-
-
-/*
- * reset the XGXS (between serdes and IBC).  Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining.  To do this right, we reset IBC
- * as well.
- */
-static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
-{
-       u64 val, prev_val;
-
-       prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-       val = prev_val | INFINIPATH_XGXS_RESET;
-       prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-       ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-}
-
-
-static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
-       int ret;
-
-       switch (which) {
-       case IPATH_IB_CFG_LWID:
-               ret = dd->ipath_link_width_active;
-               break;
-       case IPATH_IB_CFG_SPD:
-               ret = dd->ipath_link_speed_active;
-               break;
-       case IPATH_IB_CFG_LWID_ENB:
-               ret = dd->ipath_link_width_enabled;
-               break;
-       case IPATH_IB_CFG_SPD_ENB:
-               ret = dd->ipath_link_speed_enabled;
-               break;
-       default:
-               ret =  -ENOTSUPP;
-               break;
-       }
-       return ret;
-}
-
-
-/* we assume range checking is already done, if needed */
-static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
-       int ret = 0;
-
-       if (which == IPATH_IB_CFG_LWID_ENB)
-               dd->ipath_link_width_enabled = val;
-       else if (which == IPATH_IB_CFG_SPD_ENB)
-               dd->ipath_link_speed_enabled = val;
-       else
-               ret = -ENOTSUPP;
-       return ret;
-}
-
-
-static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
-{
-}
-
-
-static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
-       ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
-               ipath_ib_linktrstate(dd, ibcs));
-       return 0;
-}
-
-
-/**
- * ipath_init_iba6110_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
-{
-       dd->ipath_f_intrsetup = ipath_ht_intconfig;
-       dd->ipath_f_bus = ipath_setup_ht_config;
-       dd->ipath_f_reset = ipath_setup_ht_reset;
-       dd->ipath_f_get_boardname = ipath_ht_boardname;
-       dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
-       dd->ipath_f_early_init = ipath_ht_early_init;
-       dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
-       dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
-       dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
-       dd->ipath_f_clear_tids = ipath_ht_clear_tids;
-       dd->ipath_f_put_tid = ipath_ht_put_tid;
-       dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
-       dd->ipath_f_setextled = ipath_setup_ht_setextled;
-       dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-       dd->ipath_f_free_irq = ipath_ht_free_irq;
-       dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
-       dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
-       dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
-       dd->ipath_f_config_ports = ipath_ht_config_ports;
-       dd->ipath_f_read_counters = ipath_ht_read_counters;
-       dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
-       dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
-       dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
-       dd->ipath_f_config_jint = ipath_ht_config_jint;
-       dd->ipath_f_ib_updown = ipath_ht_ib_updown;
-
-       /*
-        * initialize chip-specific variables
-        */
-       ipath_init_ht_variables(dd);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
deleted file mode 100644 (file)
index a5eea19..0000000
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-/*
- * min buffers we want to have per port, after driver
- */
-#define IPATH_MIN_USER_PORT_BUFCNT 7
-
-/*
- * Number of ports we are configured to use (to allow for more pio
- * buffers per port, etc.)  Zero means use chip value.
- */
-static ushort ipath_cfgports;
-
-module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
-MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
-
-/*
- * Number of buffers reserved for driver (verbs and layered drivers.)
- * Initialized based on number of PIO buffers if not set via module interface.
- * The problem with this is that it's global, but we'll use different
- * numbers for different chip types.
- */
-static ushort ipath_kpiobufs;
-
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
-
-module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
-                 &ipath_kpiobufs, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
-
-/**
- * create_port0_egr - allocate the eager TID buffers
- * @dd: the infinipath device
- *
- * This code is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance.
- * This is the kernel (port0) version.
- *
- * Allocate the eager TID buffers and program them into infinipath.
- * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like verbs
- * packets, or pass the buffers up to the ipath layered driver and
- * thence the network layer, replacing them as we do so (see
- * ipath_rcv_layer()).
- */
-static int create_port0_egr(struct ipath_devdata *dd)
-{
-       unsigned e, egrcnt;
-       struct ipath_skbinfo *skbinfo;
-       int ret;
-
-       egrcnt = dd->ipath_p0_rcvegrcnt;
-
-       skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
-       if (skbinfo == NULL) {
-               ipath_dev_err(dd, "allocation error for eager TID "
-                             "skb array\n");
-               ret = -ENOMEM;
-               goto bail;
-       }
-       for (e = 0; e < egrcnt; e++) {
-               /*
-                * This is a bit tricky in that we allocate extra
-                * space for 2 bytes of the 14 byte ethernet header.
-                * These two bytes are passed in the ipath header so
-                * the rest of the data is word aligned.  We allocate
-                * 4 bytes so that the data buffer stays word aligned.
-                * See ipath_kreceive() for more details.
-                */
-               skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
-               if (!skbinfo[e].skb) {
-                       ipath_dev_err(dd, "SKB allocation error for "
-                                     "eager TID %u\n", e);
-                       while (e != 0)
-                               dev_kfree_skb(skbinfo[--e].skb);
-                       vfree(skbinfo);
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-       }
-       /*
-        * After loop above, so we can test non-NULL to see if ready
-        * to use at receive, etc.
-        */
-       dd->ipath_port0_skbinfo = skbinfo;
-
-       for (e = 0; e < egrcnt; e++) {
-               dd->ipath_port0_skbinfo[e].phys =
-                 ipath_map_single(dd->pcidev,
-                                  dd->ipath_port0_skbinfo[e].skb->data,
-                                  dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
-               dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
-                                   ((char __iomem *) dd->ipath_kregbase +
-                                    dd->ipath_rcvegrbase),
-                                   RCVHQ_RCV_TYPE_EAGER,
-                                   dd->ipath_port0_skbinfo[e].phys);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int bringup_link(struct ipath_devdata *dd)
-{
-       u64 val, ibc;
-       int ret = 0;
-
-       /* hold IBC in reset */
-       dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-
-       /*
-        * set initial max size pkt IBC will send, including ICRC; it's the
-        * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
-        */
-       val = (dd->ipath_ibmaxlen >> 2) + 1;
-       ibc = val << dd->ibcc_mpl_shift;
-
-       /* flowcontrolwatermark is in units of KBytes */
-       ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
-       /*
-        * How often flowctrl sent.  More or less in usecs; balance against
-        * watermark value, so that in theory senders always get a flow
-        * control update in time to not let the IB link go idle.
-        */
-       ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
-       /* max error tolerance */
-       ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-       /* use "real" buffer space for */
-       ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
-       /* IB credit flow control. */
-       ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-       /* initially come up waiting for TS1, without sending anything. */
-       dd->ipath_ibcctrl = ibc;
-       /*
-        * Want to start out with both LINKCMD and LINKINITCMD in NOP
-        * (0 and 0).  Don't put linkinitcmd in ipath_ibcctrl, want that
-        * to stay a NOP. Flag that we are disabled, for the (unlikely)
-        * case that some recovery path is trying to bring the link up
-        * before we are ready.
-        */
-       ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-               INFINIPATH_IBCC_LINKINITCMD_SHIFT;
-       dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-       ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
-                  (unsigned long long) ibc);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
-
-       // be sure chip saw it
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       ret = dd->ipath_f_bringup_serdes(dd);
-
-       if (ret)
-               dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
-                        "not usable\n");
-       else {
-               /* enable IBC */
-               dd->ipath_control |= INFINIPATH_C_LINKENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                                dd->ipath_control);
-       }
-
-       return ret;
-}
-
-static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
-{
-       struct ipath_portdata *pd;
-
-       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-       if (pd) {
-               pd->port_dd = dd;
-               pd->port_cnt = 1;
-               /* The port 0 pkey table is used by the layer interface. */
-               pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
-               pd->port_seq_cnt = 1;
-       }
-       return pd;
-}
-
-static int init_chip_first(struct ipath_devdata *dd)
-{
-       struct ipath_portdata *pd;
-       int ret = 0;
-       u64 val;
-
-       spin_lock_init(&dd->ipath_kernel_tid_lock);
-       spin_lock_init(&dd->ipath_user_tid_lock);
-       spin_lock_init(&dd->ipath_sendctrl_lock);
-       spin_lock_init(&dd->ipath_uctxt_lock);
-       spin_lock_init(&dd->ipath_sdma_lock);
-       spin_lock_init(&dd->ipath_gpio_lock);
-       spin_lock_init(&dd->ipath_eep_st_lock);
-       spin_lock_init(&dd->ipath_sdepb_lock);
-       mutex_init(&dd->ipath_eep_lock);
-
-       /*
-        * skip cfgports stuff because we are not allocating memory,
-        * and we don't want problems if the portcnt changed due to
-        * cfgports.  We do still check and report a difference, if
-        * not same (should be impossible).
-        */
-       dd->ipath_f_config_ports(dd, ipath_cfgports);
-       if (!ipath_cfgports)
-               dd->ipath_cfgports = dd->ipath_portcnt;
-       else if (ipath_cfgports <= dd->ipath_portcnt) {
-               dd->ipath_cfgports = ipath_cfgports;
-               ipath_dbg("Configured to use %u ports out of %u in chip\n",
-                         dd->ipath_cfgports, ipath_read_kreg32(dd,
-                         dd->ipath_kregs->kr_portcnt));
-       } else {
-               dd->ipath_cfgports = dd->ipath_portcnt;
-               ipath_dbg("Tried to configured to use %u ports; chip "
-                         "only supports %u\n", ipath_cfgports,
-                         ipath_read_kreg32(dd,
-                                 dd->ipath_kregs->kr_portcnt));
-       }
-       /*
-        * Allocate full portcnt array, rather than just cfgports, because
-        * cleanup iterates across all possible ports.
-        */
-       dd->ipath_pd = kcalloc(dd->ipath_portcnt, sizeof(*dd->ipath_pd),
-                              GFP_KERNEL);
-
-       if (!dd->ipath_pd) {
-               ipath_dev_err(dd, "Unable to allocate portdata array, "
-                             "failing\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       pd = create_portdata0(dd);
-       if (!pd) {
-               ipath_dev_err(dd, "Unable to allocate portdata for port "
-                             "0, failing\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-       dd->ipath_pd[0] = pd;
-
-       dd->ipath_rcvtidcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-       dd->ipath_rcvtidbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-       dd->ipath_rcvegrcnt =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-       dd->ipath_rcvegrbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-       dd->ipath_palign =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-       dd->ipath_piobufbase =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
-       dd->ipath_piosize2k = val & ~0U;
-       dd->ipath_piosize4k = val >> 32;
-       if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
-               ipath_mtu4096 = 0; /* 4KB not supported by this chip */
-       dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
-       dd->ipath_piobcnt2k = val & ~0U;
-       dd->ipath_piobcnt4k = val >> 32;
-       dd->ipath_pio2kbase =
-               (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-                                (dd->ipath_piobufbase & 0xffffffff));
-       if (dd->ipath_piobcnt4k) {
-               dd->ipath_pio4kbase = (u32 __iomem *)
-                       (((char __iomem *) dd->ipath_kregbase) +
-                        (dd->ipath_piobufbase >> 32));
-               /*
-                * 4K buffers take 2 pages; we use roundup just to be
-                * paranoid; we calculate it once here, rather than on
-                * ever buf allocate
-                */
-               dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
-                                         dd->ipath_palign);
-               ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
-                         "(%x aligned)\n",
-                         dd->ipath_piobcnt2k, dd->ipath_piosize2k,
-                         dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
-                         dd->ipath_piosize4k, dd->ipath_pio4kbase,
-                         dd->ipath_4kalign);
-       } else {
-               ipath_dbg("%u 2k piobufs @ %p\n",
-                         dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
-       }
-done:
-       return ret;
-}
-
-/**
- * init_chip_reset - re-initialize after a reset, or enable
- * @dd: the infinipath device
- *
- * sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explicitly, in case reset
- * failed
- */
-static int init_chip_reset(struct ipath_devdata *dd)
-{
-       u32 rtmp;
-       int i;
-       unsigned long flags;
-
-       /*
-        * ensure chip does no sends or receives, tail updates, or
-        * pioavail updates while we re-initialize
-        */
-       dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
-       for (i = 0; i < dd->ipath_portcnt; i++) {
-               clear_bit(dd->ipath_r_portenable_shift + i,
-                         &dd->ipath_rcvctrl);
-               clear_bit(dd->ipath_r_intravail_shift + i,
-                         &dd->ipath_rcvctrl);
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-               dd->ipath_rcvctrl);
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl = 0U; /* no sdma, etc */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-       if (rtmp != dd->ipath_rcvtidcnt)
-               dev_info(&dd->pcidev->dev, "tidcnt was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvtidcnt, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-       if (rtmp != dd->ipath_rcvtidbase)
-               dev_info(&dd->pcidev->dev, "tidbase was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvtidbase, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-       if (rtmp != dd->ipath_rcvegrcnt)
-               dev_info(&dd->pcidev->dev, "egrcnt was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvegrcnt, rtmp);
-       rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-       if (rtmp != dd->ipath_rcvegrbase)
-               dev_info(&dd->pcidev->dev, "egrbase was %u before "
-                        "reset, now %u, using original\n",
-                        dd->ipath_rcvegrbase, rtmp);
-
-       return 0;
-}
-
-static int init_pioavailregs(struct ipath_devdata *dd)
-{
-       int ret;
-
-       dd->ipath_pioavailregs_dma = dma_alloc_coherent(
-               &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
-               GFP_KERNEL);
-       if (!dd->ipath_pioavailregs_dma) {
-               ipath_dev_err(dd, "failed to allocate PIOavail reg area "
-                             "in memory\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       /*
-        * we really want L2 cache aligned, but for current CPUs of
-        * interest, they are the same.
-        */
-       dd->ipath_statusp = (u64 *)
-               ((char *)dd->ipath_pioavailregs_dma +
-                ((2 * L1_CACHE_BYTES +
-                  dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
-       /* copy the current value now that it's really allocated */
-       *dd->ipath_statusp = dd->_ipath_status;
-       /*
-        * setup buffer to hold freeze msg, accessible to apps,
-        * following statusp
-        */
-       dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
-       /* and its length */
-       dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
-
-       ret = 0;
-
-done:
-       return ret;
-}
-
-/**
- * init_shadow_tids - allocate the shadow TID array
- * @dd: the infinipath device
- *
- * allocate the shadow TID array, so we can ipath_munlock previous
- * entries.  It may make more sense to move the pageshadow to the
- * port data structure, so we only allocate memory for ports actually
- * in use, since we at 8k per port, now.
- */
-static void init_shadow_tids(struct ipath_devdata *dd)
-{
-       struct page **pages;
-       dma_addr_t *addrs;
-
-       pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-                       sizeof(struct page *));
-       if (!pages) {
-               ipath_dev_err(dd, "failed to allocate shadow page * "
-                             "array, no expected sends!\n");
-               dd->ipath_pageshadow = NULL;
-               return;
-       }
-
-       addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-                       sizeof(dma_addr_t));
-       if (!addrs) {
-               ipath_dev_err(dd, "failed to allocate shadow dma handle "
-                             "array, no expected sends!\n");
-               vfree(pages);
-               dd->ipath_pageshadow = NULL;
-               return;
-       }
-
-       dd->ipath_pageshadow = pages;
-       dd->ipath_physshadow = addrs;
-}
-
-static void enable_chip(struct ipath_devdata *dd, int reinit)
-{
-       u32 val;
-       u64 rcvmask;
-       unsigned long flags;
-       int i;
-
-       if (!reinit)
-               init_waitqueue_head(&ipath_state_wait);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       /* Enable PIO send, and update of PIOavail regs to memory. */
-       dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
-               INFINIPATH_S_PIOBUFAVAILUPD;
-
-       /*
-        * Set the PIO avail update threshold to host memory
-        * on chips that support it.
-        */
-       if (dd->ipath_pioupd_thresh)
-               dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-                       << INFINIPATH_S_UPDTHRESH_SHIFT;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /*
-        * Enable kernel ports' receive and receive interrupt.
-        * Other ports done as user opens and inits them.
-        */
-       rcvmask = 1ULL;
-       dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
-               (rcvmask << dd->ipath_r_intravail_shift);
-       if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
-               dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                        dd->ipath_rcvctrl);
-
-       /*
-        * now ready for use.  this should be cleared whenever we
-        * detect a reset, or initiate one.
-        */
-       dd->ipath_flags |= IPATH_INITTED;
-
-       /*
-        * Init our shadow copies of head from tail values,
-        * and write head values to match.
-        */
-       val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
-       ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
-
-       /* Initialize so we interrupt on next packet received */
-       ipath_write_ureg(dd, ur_rcvhdrhead,
-                        dd->ipath_rhdrhead_intr_off |
-                        dd->ipath_pd[0]->port_head, 0);
-
-       /*
-        * by now pioavail updates to memory should have occurred, so
-        * copy them into our working/shadow registers; this is in
-        * case something went wrong with abort, but mostly to get the
-        * initial values of the generation bit correct.
-        */
-       for (i = 0; i < dd->ipath_pioavregs; i++) {
-               __le64 pioavail;
-
-               /*
-                * Chip Errata bug 6641; even and odd qwords>3 are swapped.
-                */
-               if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-                       pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
-               else
-                       pioavail = dd->ipath_pioavailregs_dma[i];
-               /*
-                * don't need to worry about ipath_pioavailkernel here
-                * because we will call ipath_chg_pioavailkernel() later
-                * in initialization, to busy out buffers as needed
-                */
-               dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
-       }
-       /* can get counters, stats, etc. */
-       dd->ipath_flags |= IPATH_PRESENT;
-}
-
-static int init_housekeeping(struct ipath_devdata *dd, int reinit)
-{
-       char boardn[40];
-       int ret = 0;
-
-       /*
-        * have to clear shadow copies of registers at init that are
-        * not otherwise set here, or all kinds of bizarre things
-        * happen with driver on chip reset
-        */
-       dd->ipath_rcvhdrsize = 0;
-
-       /*
-        * Don't clear ipath_flags as 8bit mode was set before
-        * entering this func. However, we do set the linkstate to
-        * unknown, so we can watch for a transition.
-        * PRESENT is set because we want register reads to work,
-        * and the kernel infrastructure saw it in config space;
-        * We clear it if we have failures.
-        */
-       dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
-       dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
-                            IPATH_LINKDOWN | IPATH_LINKINIT);
-
-       ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
-       dd->ipath_revision =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
-
-       /*
-        * set up fundamental info we need to use the chip; we assume
-        * if the revision reg and these regs are OK, we don't need to
-        * special case the rest
-        */
-       dd->ipath_sregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
-       dd->ipath_cregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
-       dd->ipath_uregbase =
-               ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
-       ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
-                  "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
-                  dd->ipath_uregbase, dd->ipath_cregbase);
-       if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
-           || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
-           || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
-           || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
-               ipath_dev_err(dd, "Register read failures from chip, "
-                             "giving up initialization\n");
-               dd->ipath_flags &= ~IPATH_PRESENT;
-               ret = -ENODEV;
-               goto done;
-       }
-
-
-       /* clear diagctrl register, in case diags were running and crashed */
-       ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
-
-       /* clear the initial reset flag, in case first driver load */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-                        INFINIPATH_E_RESET);
-
-       ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
-                  (unsigned long long) dd->ipath_revision,
-                  dd->ipath_pcirev);
-
-       if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
-            INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
-               ipath_dev_err(dd, "Driver only handles version %d, "
-                             "chip swversion is %d (%llx), failng\n",
-                             IPATH_CHIP_SWVERSION,
-                             (int)(dd->ipath_revision >>
-                                   INFINIPATH_R_SOFTWARE_SHIFT) &
-                             INFINIPATH_R_SOFTWARE_MASK,
-                             (unsigned long long) dd->ipath_revision);
-               ret = -ENOSYS;
-               goto done;
-       }
-       dd->ipath_majrev = (u8) ((dd->ipath_revision >>
-                                 INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
-                                INFINIPATH_R_CHIPREVMAJOR_MASK);
-       dd->ipath_minrev = (u8) ((dd->ipath_revision >>
-                                 INFINIPATH_R_CHIPREVMINOR_SHIFT) &
-                                INFINIPATH_R_CHIPREVMINOR_MASK);
-       dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
-                                   INFINIPATH_R_BOARDID_SHIFT) &
-                                  INFINIPATH_R_BOARDID_MASK);
-
-       ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
-
-       snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-                "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
-                "SW Compat %u\n",
-                IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
-                (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
-                INFINIPATH_R_ARCH_MASK,
-                dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
-                (unsigned)(dd->ipath_revision >>
-                           INFINIPATH_R_SOFTWARE_SHIFT) &
-                INFINIPATH_R_SOFTWARE_MASK);
-
-       ipath_dbg("%s", dd->ipath_boardversion);
-
-       if (ret)
-               goto done;
-
-       if (reinit)
-               ret = init_chip_reset(dd);
-       else
-               ret = init_chip_first(dd);
-
-done:
-       return ret;
-}
-
-static void verify_interrupt(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-
-       if (!dd)
-               return; /* being torn down */
-
-       /*
-        * If we don't have any interrupts, let the user know and
-        * don't bother checking again.
-        */
-       if (dd->ipath_int_counter == 0) {
-               if (!dd->ipath_f_intr_fallback(dd))
-                       dev_err(&dd->pcidev->dev, "No interrupts detected, "
-                               "not usable.\n");
-               else /* re-arm the timer to see if fallback works */
-                       mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
-       } else
-               ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
-                       dd->ipath_int_counter);
-}
-
-/**
- * ipath_init_chip - do the actual initialization sequence on the chip
- * @dd: the infinipath device
- * @reinit: reinitializing, so don't allocate new memory
- *
- * Do the actual initialization sequence on the chip.  This is done
- * both from the init routine called from the PCI infrastructure, and
- * when we reset the chip, or detect that it was reset internally,
- * or it's administratively re-enabled.
- *
- * Memory allocation here and in called routines is only done in
- * the first case (reinit == 0).  We have to be careful, because even
- * without memory allocation, we need to re-write all the chip registers
- * TIDs, etc. after the reset or enable has completed.
- */
-int ipath_init_chip(struct ipath_devdata *dd, int reinit)
-{
-       int ret = 0;
-       u32 kpiobufs, defkbufs;
-       u32 piobufs, uports;
-       u64 val;
-       struct ipath_portdata *pd;
-       gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-
-       ret = init_housekeeping(dd, reinit);
-       if (ret)
-               goto done;
-
-       /*
-        * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
-        * but then it no longer nicely fits power of two, and since
-        * we now use routines that backend onto __get_free_pages, the
-        * rest would be wasted.
-        */
-       dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
-                        dd->ipath_rcvhdrcnt);
-
-       /*
-        * Set up the shadow copies of the piobufavail registers,
-        * which we compare against the chip registers for now, and
-        * the in memory DMA'ed copies of the registers.  This has to
-        * be done early, before we calculate lastport, etc.
-        */
-       piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       /*
-        * calc number of pioavail registers, and save it; we have 2
-        * bits per buffer.
-        */
-       dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
-               / (sizeof(u64) * BITS_PER_BYTE / 2);
-       uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
-       if (piobufs > 144)
-               defkbufs = 32 + dd->ipath_pioreserved;
-       else
-               defkbufs = 16 + dd->ipath_pioreserved;
-
-       if (ipath_kpiobufs && (ipath_kpiobufs +
-               (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
-               int i = (int) piobufs -
-                       (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
-               if (i < 1)
-                       i = 1;
-               dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
-                        "%d for kernel leaves too few for %d user ports "
-                        "(%d each); using %u\n", ipath_kpiobufs,
-                        piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
-               /*
-                * shouldn't change ipath_kpiobufs, because could be
-                * different for different devices...
-                */
-               kpiobufs = i;
-       } else if (ipath_kpiobufs)
-               kpiobufs = ipath_kpiobufs;
-       else
-               kpiobufs = defkbufs;
-       dd->ipath_lastport_piobuf = piobufs - kpiobufs;
-       dd->ipath_pbufsport =
-               uports ? dd->ipath_lastport_piobuf / uports : 0;
-       /* if not an even divisor, some user ports get extra buffers */
-       dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
-               (dd->ipath_pbufsport * uports);
-       if (dd->ipath_ports_extrabuf)
-               ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
-                       "ports <= %u\n", dd->ipath_pbufsport,
-                       dd->ipath_ports_extrabuf);
-       dd->ipath_lastpioindex = 0;
-       dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
-       /* ipath_pioavailshadow initialized earlier */
-       ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
-                  "each for %u user ports\n", kpiobufs,
-                  piobufs, dd->ipath_pbufsport, uports);
-       ret = dd->ipath_f_early_init(dd);
-       if (ret) {
-               ipath_dev_err(dd, "Early initialization failure\n");
-               goto done;
-       }
-
-       /*
-        * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
-        * done after early_init.
-        */
-       dd->ipath_hdrqlast =
-               dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
-                        dd->ipath_rcvhdrentsize);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-                        dd->ipath_rcvhdrsize);
-
-       if (!reinit) {
-               ret = init_pioavailregs(dd);
-               init_shadow_tids(dd);
-               if (ret)
-                       goto done;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
-                        dd->ipath_pioavailregs_phys);
-
-       /*
-        * this is to detect s/w errors, which the h/w works around by
-        * ignoring the low 6 bits of address, if it wasn't aligned.
-        */
-       val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
-       if (val != dd->ipath_pioavailregs_phys) {
-               ipath_dev_err(dd, "Catastrophic software error, "
-                             "SendPIOAvailAddr written as %lx, "
-                             "read back as %llx\n",
-                             (unsigned long) dd->ipath_pioavailregs_phys,
-                             (unsigned long long) val);
-               ret = -EINVAL;
-               goto done;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
-
-       /*
-        * make sure we are not in freeze, and PIO send enabled, so
-        * writes to pbc happen
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-       /*
-        * before error clears, since we expect serdes pll errors during
-        * this, the first time after reset
-        */
-       if (bringup_link(dd)) {
-               dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
-               ret = -ENETDOWN;
-               goto done;
-       }
-
-       /*
-        * clear any "expected" hwerrs from reset and/or initialization
-        * clear any that aren't enabled (at least this once), and then
-        * set the enable mask
-        */
-       dd->ipath_f_init_hwerrors(dd);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-                        ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-                        dd->ipath_hwerrmask);
-
-       /* clear all */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-       /* enable errors that are masked, at least this first time. */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                        ~dd->ipath_maskederrs);
-       dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
-       dd->ipath_errormask =
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-       /* clear any interrupts up to this point (ints still not enabled) */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-       dd->ipath_f_tidtemplate(dd);
-
-       /*
-        * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
-        * re-init, the simplest way to handle this is to free
-        * existing, and re-allocate.
-        * Need to re-create rest of port 0 portdata as well.
-        */
-       pd = dd->ipath_pd[0];
-       if (reinit) {
-               struct ipath_portdata *npd;
-
-               /*
-                * Alloc and init new ipath_portdata for port0,
-                * Then free old pd. Could lead to fragmentation, but also
-                * makes later support for hot-swap easier.
-                */
-               npd = create_portdata0(dd);
-               if (npd) {
-                       ipath_free_pddata(dd, pd);
-                       dd->ipath_pd[0] = npd;
-                       pd = npd;
-               } else {
-                       ipath_dev_err(dd, "Unable to allocate portdata"
-                                     " for port 0, failing\n");
-                       ret = -ENOMEM;
-                       goto done;
-               }
-       }
-       ret = ipath_create_rcvhdrq(dd, pd);
-       if (!ret)
-               ret = create_port0_egr(dd);
-       if (ret) {
-               ipath_dev_err(dd, "failed to allocate kernel port's "
-                             "rcvhdrq and/or egr bufs\n");
-               goto done;
-       } else {
-               enable_chip(dd, reinit);
-       }
-
-       /* after enable_chip, so pioavailshadow setup */
-       ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
-
-       /*
-        * Cancel any possible active sends from early driver load.
-        * Follows early_init because some chips have to initialize
-        * PIO buffers in early_init to avoid false parity errors.
-        * After enable and ipath_chg_pioavailkernel so we can safely
-        * enable pioavail updates and PIOENABLE; packets are now
-        * ready to go out.
-        */
-       ipath_cancel_sends(dd, 1);
-
-       if (!reinit) {
-               /*
-                * Used when we close a port, for DMA already in flight
-                * at close.
-                */
-               dd->ipath_dummy_hdrq = dma_alloc_coherent(
-                       &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
-                       &dd->ipath_dummy_hdrq_phys,
-                       gfp_flags);
-               if (!dd->ipath_dummy_hdrq) {
-                       dev_info(&dd->pcidev->dev,
-                               "Couldn't allocate 0x%lx bytes for dummy hdrq\n",
-                               dd->ipath_pd[0]->port_rcvhdrq_size);
-                       /* fallback to just 0'ing */
-                       dd->ipath_dummy_hdrq_phys = 0UL;
-               }
-       }
-
-       /*
-        * cause retrigger of pending interrupts ignored during init,
-        * even if we had errors
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-
-       if (!dd->ipath_stats_timer_active) {
-               /*
-                * first init, or after an admin disable/enable
-                * set up stats retrieval timer, even if we had errors
-                * in last portion of setup
-                */
-               setup_timer(&dd->ipath_stats_timer, ipath_get_faststats,
-                               (unsigned long)dd);
-               /* every 5 seconds; */
-               dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
-               /* takes ~16 seconds to overflow at full IB 4x bandwdith */
-               add_timer(&dd->ipath_stats_timer);
-               dd->ipath_stats_timer_active = 1;
-       }
-
-       /* Set up SendDMA if chip supports it */
-       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-               ret = setup_sdma(dd);
-
-       /* Set up HoL state */
-       setup_timer(&dd->ipath_hol_timer, ipath_hol_event, (unsigned long)dd);
-
-       dd->ipath_hol_state = IPATH_HOL_UP;
-
-done:
-       if (!ret) {
-               *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
-               if (!dd->ipath_f_intrsetup(dd)) {
-                       /* now we can enable all interrupts from the chip */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-                                        -1LL);
-                       /* force re-interrupt of any pending interrupts. */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
-                                        0ULL);
-                       /* chip is usable; mark it as initialized */
-                       *dd->ipath_statusp |= IPATH_STATUS_INITTED;
-
-                       /*
-                        * setup to verify we get an interrupt, and fallback
-                        * to an alternate if necessary and possible
-                        */
-                       if (!reinit) {
-                               setup_timer(&dd->ipath_intrchk_timer,
-                                               verify_interrupt,
-                                               (unsigned long)dd);
-                       }
-                       dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
-                       add_timer(&dd->ipath_intrchk_timer);
-               } else
-                       ipath_dev_err(dd, "No interrupts enabled, couldn't "
-                                     "setup interrupt address\n");
-
-               if (dd->ipath_cfgports > ipath_stats.sps_nports)
-                       /*
-                        * sps_nports is a global, so, we set it to
-                        * the highest number of ports of any of the
-                        * chips we find; we never decrement it, at
-                        * least for now.  Since this might have changed
-                        * over disable/enable or prior to reset, always
-                        * do the check and potentially adjust.
-                        */
-                       ipath_stats.sps_nports = dd->ipath_cfgports;
-       } else
-               ipath_dbg("Failed (%d) to initialize chip\n", ret);
-
-       /* if ret is non-zero, we probably should do some cleanup
-          here... */
-       return ret;
-}
-
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
-{
-       struct ipath_devdata *dd;
-       unsigned long flags;
-       unsigned short val;
-       int ret;
-
-       ret = ipath_parse_ushort(str, &val);
-
-       spin_lock_irqsave(&ipath_devs_lock, flags);
-
-       if (ret < 0)
-               goto bail;
-
-       if (val == 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-               if (dd->ipath_kregbase)
-                       continue;
-               if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-                          (dd->ipath_cfgports *
-                           IPATH_MIN_USER_PORT_BUFCNT)))
-               {
-                       ipath_dev_err(
-                               dd,
-                               "Allocating %d PIO bufs for kernel leaves "
-                               "too few for %d user ports (%d each)\n",
-                               val, dd->ipath_cfgports - 1,
-                               IPATH_MIN_USER_PORT_BUFCNT);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               dd->ipath_lastport_piobuf =
-                       dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
-       }
-
-       ipath_kpiobufs = val;
-       ret = 0;
-bail:
-       spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_intr.c b/drivers/staging/rdma/ipath/ipath_intr.c
deleted file mode 100644 (file)
index 0403fa2..0000000
+++ /dev/null
@@ -1,1271 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-
-/*
- * Called when we might have an error that is specific to a particular
- * PIO buffer, and may need to cancel that buffer, so it can be re-used.
- */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
-{
-       u32 piobcnt;
-       unsigned long sbuf[4];
-       /*
-        * it's possible that sendbuffererror could have bits set; might
-        * have already done this as a result of hardware error handling
-        */
-       piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       /* read these before writing errorclear */
-       sbuf[0] = ipath_read_kreg64(
-               dd, dd->ipath_kregs->kr_sendbuffererror);
-       sbuf[1] = ipath_read_kreg64(
-               dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-       if (piobcnt > 128)
-               sbuf[2] = ipath_read_kreg64(
-                       dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-       if (piobcnt > 192)
-               sbuf[3] = ipath_read_kreg64(
-                       dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-       else
-               sbuf[3] = 0;
-
-       if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-               int i;
-               if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-                       time_after(dd->ipath_lastcancel, jiffies)) {
-                       __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
-                                         "SendbufErrs %lx %lx", sbuf[0],
-                                         sbuf[1]);
-                       if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-                               printk(" %lx %lx ", sbuf[2], sbuf[3]);
-                       printk("\n");
-               }
-
-               for (i = 0; i < piobcnt; i++)
-                       if (test_bit(i, sbuf))
-                               ipath_disarm_piobufs(dd, i, 1);
-               /* ignore armlaunch errs for a bit */
-               dd->ipath_lastcancel = jiffies+3;
-       }
-}
-
-
-/* These are all rcv-related errors which we want to count for stats */
-#define E_SUM_PKTERRS \
-       (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
-        INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
-        INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
-        INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-        INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
-        INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
-
-/* These are all send-related errors which we want to count for stats */
-#define E_SUM_ERRS \
-       (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
-        INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
-        INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-        INFINIPATH_E_INVALIDADDR)
-
-/*
- * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
- * errors not related to freeze and cancelling buffers.  Can't ignore
- * armlaunch because could get more while still cleaning up, and need
- * to cancel those as they happen.
- */
-#define E_SPKT_ERRS_IGNORE \
-        (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
-        INFINIPATH_E_SPKTLEN)
-
-/*
- * these are errors that can occur when the link changes state while
- * a packet is being sent or received.  This doesn't cover things
- * like EBP or VCRC that can be the result of a sending having the
- * link change state, so we receive a "known bad" packet.
- */
-#define E_SUM_LINK_PKTERRS \
-       (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-        INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-        INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-        INFINIPATH_E_RUNEXPCHAR)
-
-static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       u64 ignore_this_time = 0;
-
-       ipath_disarm_senderrbufs(dd);
-       if ((errs & E_SUM_LINK_PKTERRS) &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               /*
-                * This can happen when SMA is trying to bring the link
-                * up, but the IB link changes state at the "wrong" time.
-                * The IB logic then complains that the packet isn't
-                * valid.  We don't want to confuse people, so we just
-                * don't print them, except at debug
-                */
-               ipath_dbg("Ignoring packet errors %llx, because link not "
-                         "ACTIVE\n", (unsigned long long) errs);
-               ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-       }
-
-       return ignore_this_time;
-}
-
-/* generic hw error messages... */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
-       { \
-               .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
-                         INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
-               .msg = "TXE " #a " Memory Parity"            \
-       }
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
-       { \
-               .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
-                         INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
-               .msg = "RXE " #a " Memory Parity"            \
-       }
-
-static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
-       INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
-       INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
-
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
-       INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
-
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
-       INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
-};
-
-/**
- * ipath_format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
-       strlcat(msg, "[", msgl);
-       strlcat(msg, hwmsg, msgl);
-       strlcat(msg, "]", msgl);
-}
-
-/**
- * ipath_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
- */
-void ipath_format_hwerrors(u64 hwerrs,
-                          const struct ipath_hwerror_msgs *hwerrmsgs,
-                          size_t nhwerrmsgs,
-                          char *msg, size_t msgl)
-{
-       int i;
-       const int glen =
-           ARRAY_SIZE(ipath_generic_hwerror_msgs);
-
-       for (i=0; i<glen; i++) {
-               if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
-                       ipath_format_hwmsg(msg, msgl,
-                                          ipath_generic_hwerror_msgs[i].msg);
-               }
-       }
-
-       for (i=0; i<nhwerrmsgs; i++) {
-               if (hwerrs & hwerrmsgs[i].mask) {
-                       ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-               }
-       }
-}
-
-/* return the strings for the most common link states */
-static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       char *ret;
-       u32 state;
-
-       state = ipath_ib_state(dd, ibcs);
-       if (state == dd->ib_init)
-               ret = "Init";
-       else if (state == dd->ib_arm)
-               ret = "Arm";
-       else if (state == dd->ib_active)
-               ret = "Active";
-       else
-               ret = "Down";
-       return ret;
-}
-
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
-{
-       struct ib_event event;
-
-       event.device = &dd->verbs_dev->ibdev;
-       event.element.port_num = 1;
-       event.event = ev;
-       ib_dispatch_event(&event);
-}
-
-static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-                                    ipath_err_t errs)
-{
-       u32 ltstate, lstate, ibstate, lastlstate;
-       u32 init = dd->ib_init;
-       u32 arm = dd->ib_arm;
-       u32 active = dd->ib_active;
-       const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-
-       lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
-       ibstate = ipath_ib_state(dd, ibcs);
-       /* linkstate at last interrupt */
-       lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-       ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
-
-       /*
-        * Since going into a recovery state causes the link state to go
-        * down and since recovery is transitory, it is better if we "miss"
-        * ever seeing the link training state go into recovery (i.e.,
-        * ignore this transition for link state special handling purposes)
-        * without even updating ipath_lastibcstat.
-        */
-       if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
-           (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
-           (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
-               goto done;
-
-       /*
-        * if linkstate transitions into INIT from any of the various down
-        * states, or if it transitions from any of the up (INIT or better)
-        * states into any of the down states (except link recovery), then
-        * call the chip-specific code to take appropriate actions.
-        */
-       if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
-               lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
-               /* transitioned to UP */
-               if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
-                       /* link came up, so we must no longer be disabled */
-                       dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-                       ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
-                       goto skip_ibchange; /* chip-code handled */
-               }
-       } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
-               (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
-               ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
-               ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
-               int handled;
-               handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
-               dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
-               if (handled) {
-                       ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
-                       goto skip_ibchange; /* chip-code handled */
-               }
-       }
-
-       /*
-        * Significant enough to always print and get into logs, if it was
-        * unexpected.  If it was a requested state change, we'll have
-        * already cleared the flags, so we won't print this warning
-        */
-       if ((ibstate != arm && ibstate != active) &&
-           (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-               dev_info(&dd->pcidev->dev, "Link state changed from %s "
-                        "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
-                        "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
-       }
-
-       if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-           ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-               u32 lastlts;
-               lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-               /*
-                * Ignore cycling back and forth from Polling.Active to
-                * Polling.Quiet while waiting for the other end of the link
-                * to come up, except to try and decide if we are connected
-                * to a live IB device or not.  We will cycle back and
-                * forth between them if no cable is plugged in, the other
-                * device is powered off or disabled, etc.
-                */
-               if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-                   lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-                       if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
-                            (++dd->ipath_ibpollcnt == 40)) {
-                               dd->ipath_flags |= IPATH_NOCABLE;
-                               *dd->ipath_statusp |=
-                                       IPATH_STATUS_IB_NOCABLE;
-                               ipath_cdbg(LINKVERB, "Set NOCABLE\n");
-                       }
-                       ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
-                               ipath_ibcstatus_str[ltstate], ibstate);
-                       goto skip_ibchange;
-               }
-       }
-
-       dd->ipath_ibpollcnt = 0; /* not poll*, now */
-       ipath_stats.sps_iblink++;
-
-       if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
-               u64 linkrecov;
-               linkrecov = ipath_snap_cntr(dd,
-                       dd->ipath_cregs->cr_iblinkerrrecovcnt);
-               if (linkrecov != dd->ipath_lastlinkrecov) {
-                       ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
-                               (unsigned long long) ibcs,
-                               ib_linkstate(dd, ibcs),
-                               ipath_ibcstatus_str[ltstate],
-                               (unsigned long long) linkrecov);
-                       /* and no more until active again */
-                       dd->ipath_lastlinkrecov = 0;
-                       ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-                       goto skip_ibchange;
-               }
-       }
-
-       if (ibstate == init || ibstate == arm || ibstate == active) {
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-               if (ibstate == init || ibstate == arm) {
-                       *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-                       if (dd->ipath_flags & IPATH_LINKACTIVE)
-                               signal_ib_event(dd, IB_EVENT_PORT_ERR);
-               }
-               if (ibstate == arm) {
-                       dd->ipath_flags |= IPATH_LINKARMED;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK |
-                               IPATH_LINKINIT | IPATH_LINKDOWN |
-                               IPATH_LINKACTIVE | IPATH_NOCABLE);
-                       ipath_hol_down(dd);
-               } else  if (ibstate == init) {
-                       /*
-                        * set INIT and DOWN.  Down is checked by
-                        * most of the other code, but INIT is
-                        * useful to know in a few places.
-                        */
-                       dd->ipath_flags |= IPATH_LINKINIT |
-                               IPATH_LINKDOWN;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK |
-                               IPATH_LINKARMED | IPATH_LINKACTIVE |
-                               IPATH_NOCABLE);
-                       ipath_hol_down(dd);
-               } else {  /* active */
-                       dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
-                               dd->ipath_cregs->cr_iblinkerrrecovcnt);
-                       *dd->ipath_statusp |=
-                               IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-                       dd->ipath_flags |= IPATH_LINKACTIVE;
-                       dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                               | IPATH_LINKDOWN | IPATH_LINKARMED |
-                               IPATH_NOCABLE);
-                       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-                               ipath_restart_sdma(dd);
-                       signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-                       /* LED active not handled in chip _f_updown */
-                       dd->ipath_f_setextled(dd, lstate, ltstate);
-                       ipath_hol_up(dd);
-               }
-
-               /*
-                * print after we've already done the work, so as not to
-                * delay the state changes and notifications, for debugging
-                */
-               if (lstate == lastlstate)
-                       ipath_cdbg(LINKVERB, "Unchanged from last: %s "
-                               "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
-               else
-                       ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
-                                 dd->ipath_unit, ib_linkstate(dd, ibcs),
-                                 ipath_ibcstatus_str[ltstate],  ibstate);
-       } else { /* down */
-               if (dd->ipath_flags & IPATH_LINKACTIVE)
-                       signal_ib_event(dd, IB_EVENT_PORT_ERR);
-               dd->ipath_flags |= IPATH_LINKDOWN;
-               dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                                    | IPATH_LINKACTIVE |
-                                    IPATH_LINKARMED);
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-               dd->ipath_lli_counter = 0;
-
-               if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
-                       ipath_cdbg(VERBOSE, "Unit %u link state down "
-                                  "(state 0x%x), from %s\n",
-                                  dd->ipath_unit, lstate,
-                                  ib_linkstate(dd, dd->ipath_lastibcstat));
-               else
-                       ipath_cdbg(LINKVERB, "Unit %u link state changed "
-                                  "to %s (0x%x) from down (%x)\n",
-                                  dd->ipath_unit,
-                                  ipath_ibcstatus_str[ltstate],
-                                  ibstate, lastlstate);
-       }
-
-skip_ibchange:
-       dd->ipath_lastibcstat = ibcs;
-done:
-       return;
-}
-
-static void handle_supp_msgs(struct ipath_devdata *dd,
-                            unsigned supp_msgs, char *msg, u32 msgsz)
-{
-       /*
-        * Print the message unless it's ibc status change only, which
-        * happens so often we never want to count it.
-        */
-       if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-               int iserr;
-               ipath_err_t mask;
-               iserr = ipath_decode_err(dd, msg, msgsz,
-                                        dd->ipath_lasterror &
-                                        ~INFINIPATH_E_IBSTATUSCHANGED);
-
-               mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                       INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
-
-               /* if we're in debug, then don't mask SDMADISABLED msgs */
-               if (ipath_debug & __IPATH_DBG)
-                       mask &= ~INFINIPATH_E_SDMADISABLED;
-
-               if (dd->ipath_lasterror & ~mask)
-                       ipath_dev_err(dd, "Suppressed %u messages for "
-                                     "fast-repeating errors (%s) (%llx)\n",
-                                     supp_msgs, msg,
-                                     (unsigned long long)
-                                     dd->ipath_lasterror);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal", for some
-                        * types of processes (mostly benchmarks) that send
-                        * huge numbers of messages, while not processing
-                        * them. So only complain about these at debug
-                        * level.
-                        */
-                       if (iserr)
-                               ipath_dbg("Suppressed %u messages for %s\n",
-                                         supp_msgs, msg);
-                       else
-                               ipath_cdbg(ERRPKT,
-                                       "Suppressed %u messages for %s\n",
-                                         supp_msgs, msg);
-               }
-       }
-}
-
-static unsigned handle_frequent_errors(struct ipath_devdata *dd,
-                                      ipath_err_t errs, char *msg,
-                                      u32 msgsz, int *noprint)
-{
-       unsigned long nc;
-       static unsigned long nextmsg_time;
-       static unsigned nmsgs, supp_msgs;
-
-       /*
-        * Throttle back "fast" messages to no more than 10 per 5 seconds.
-        * This isn't perfect, but it's a reasonable heuristic. If we get
-        * more than 10, give a 6x longer delay.
-        */
-       nc = jiffies;
-       if (nmsgs > 10) {
-               if (time_before(nc, nextmsg_time)) {
-                       *noprint = 1;
-                       if (!supp_msgs++)
-                               nextmsg_time = nc + HZ * 3;
-               } else if (supp_msgs) {
-                       handle_supp_msgs(dd, supp_msgs, msg, msgsz);
-                       supp_msgs = 0;
-                       nmsgs = 0;
-               }
-       } else if (!nmsgs++ || time_after(nc, nextmsg_time)) {
-               nextmsg_time = nc + HZ / 2;
-       }
-
-       return supp_msgs;
-}
-
-static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       unsigned long flags;
-       int expected;
-
-       if (ipath_debug & __IPATH_DBG) {
-               char msg[128];
-               ipath_decode_err(dd, msg, sizeof msg, errs &
-                       INFINIPATH_E_SDMAERRS);
-               ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
-       }
-       if (ipath_debug & __IPATH_VERBDBG) {
-               unsigned long tl, hd, status, lengen;
-               tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-               hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-               status = ipath_read_kreg64(dd
-                       , dd->ipath_kregs->kr_senddmastatus);
-               lengen = ipath_read_kreg64(dd,
-                       dd->ipath_kregs->kr_senddmalengen);
-               ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
-                       "lengen 0x%lx\n", tl, hd, status, lengen);
-       }
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-       expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!expected)
-               ipath_cancel_sends(dd, 1);
-}
-
-static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
-{
-       unsigned long flags;
-       int expected;
-
-       if ((istat & INFINIPATH_I_SDMAINT) &&
-           !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               ipath_sdma_intr(dd);
-
-       if (istat & INFINIPATH_I_SDMADISABLED) {
-               expected = test_bit(IPATH_SDMA_ABORTING,
-                       &dd->ipath_sdma_status);
-               ipath_dbg("%s SDmaDisabled intr\n",
-                       expected ? "expected" : "unexpected");
-               spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-               __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-               if (!expected)
-                       ipath_cancel_sends(dd, 1);
-               if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-       }
-}
-
-static int handle_hdrq_full(struct ipath_devdata *dd)
-{
-       int chkerrpkts = 0;
-       u32 hd, tl;
-       u32 i;
-
-       ipath_stats.sps_hdrqfull++;
-       for (i = 0; i < dd->ipath_cfgports; i++) {
-               struct ipath_portdata *pd = dd->ipath_pd[i];
-
-               if (i == 0) {
-                       /*
-                        * For kernel receive queues, we just want to know
-                        * if there are packets in the queue that we can
-                        * process.
-                        */
-                       if (pd->port_head != ipath_get_hdrqtail(pd))
-                               chkerrpkts |= 1 << i;
-                       continue;
-               }
-
-               /* Skip if user context is not open */
-               if (!pd || !pd->port_cnt)
-                       continue;
-
-               /* Don't report the same point multiple times. */
-               if (dd->ipath_flags & IPATH_NODMA_RTAIL)
-                       tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
-               else
-                       tl = ipath_get_rcvhdrtail(pd);
-               if (tl == pd->port_lastrcvhdrqtail)
-                       continue;
-
-               hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
-               if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
-                       pd->port_lastrcvhdrqtail = tl;
-                       pd->port_hdrqfull++;
-                       /* flush hdrqfull so that poll() sees it */
-                       wmb();
-                       wake_up_interruptible(&pd->port_wait);
-               }
-       }
-
-       return chkerrpkts;
-}
-
-static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-       char msg[128];
-       u64 ignore_this_time = 0;
-       u64 iserr = 0;
-       int chkerrpkts = 0, noprint = 0;
-       unsigned supp_msgs;
-       int log_idx;
-
-       /*
-        * don't report errors that are masked, either at init
-        * (not set in ipath_errormask), or temporarily (set in
-        * ipath_maskederrs)
-        */
-       errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
-
-       supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
-               &noprint);
-
-       /* do these first, they are most important */
-       if (errs & INFINIPATH_E_HARDWARE) {
-               /* reuse same msg buf */
-               dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
-       } else {
-               u64 mask;
-               for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
-                       mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
-                       if (errs & mask)
-                               ipath_inc_eeprom_err(dd, log_idx, 1);
-               }
-       }
-
-       if (errs & INFINIPATH_E_SDMAERRS)
-               handle_sdma_errors(dd, errs);
-
-       if (!noprint && (errs & ~dd->ipath_e_bitsextant))
-               ipath_dev_err(dd, "error interrupt with unknown errors "
-                             "%llx set\n", (unsigned long long)
-                             (errs & ~dd->ipath_e_bitsextant));
-
-       if (errs & E_SUM_ERRS)
-               ignore_this_time = handle_e_sum_errs(dd, errs);
-       else if ((errs & E_SUM_LINK_PKTERRS) &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               /*
-                * This can happen when SMA is trying to bring the link
-                * up, but the IB link changes state at the "wrong" time.
-                * The IB logic then complains that the packet isn't
-                * valid.  We don't want to confuse people, so we just
-                * don't print them, except at debug
-                */
-               ipath_dbg("Ignoring packet errors %llx, because link not "
-                         "ACTIVE\n", (unsigned long long) errs);
-               ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-       }
-
-       if (supp_msgs == 250000) {
-               int s_iserr;
-               /*
-                * It's not entirely reasonable assuming that the errors set
-                * in the last clear period are all responsible for the
-                * problem, but the alternative is to assume it's the only
-                * ones on this particular interrupt, which also isn't great
-                */
-               dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
-
-               dd->ipath_errormask &= ~dd->ipath_maskederrs;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                dd->ipath_errormask);
-               s_iserr = ipath_decode_err(dd, msg, sizeof msg,
-                                          dd->ipath_maskederrs);
-
-               if (dd->ipath_maskederrs &
-                   ~(INFINIPATH_E_RRCVEGRFULL |
-                     INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
-                       ipath_dev_err(dd, "Temporarily disabling "
-                           "error(s) %llx reporting; too frequent (%s)\n",
-                               (unsigned long long) dd->ipath_maskederrs,
-                               msg);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal",
-                        * for some types of processes (mostly benchmarks)
-                        * that send huge numbers of messages, while not
-                        * processing them.  So only complain about
-                        * these at debug level.
-                        */
-                       if (s_iserr)
-                               ipath_dbg("Temporarily disabling reporting "
-                                   "too frequent queue full errors (%s)\n",
-                                   msg);
-                       else
-                               ipath_cdbg(ERRPKT,
-                                   "Temporarily disabling reporting too"
-                                   " frequent packet errors (%s)\n",
-                                   msg);
-               }
-
-               /*
-                * Re-enable the masked errors after around 3 minutes.  in
-                * ipath_get_faststats().  If we have a series of fast
-                * repeating but different errors, the interval will keep
-                * stretching out, but that's OK, as that's pretty
-                * catastrophic.
-                */
-               dd->ipath_unmasktime = jiffies + HZ * 180;
-       }
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
-       if (ignore_this_time)
-               errs &= ~ignore_this_time;
-       if (errs & ~dd->ipath_lasterror) {
-               errs &= ~dd->ipath_lasterror;
-               /* never suppress duplicate hwerrors or ibstatuschange */
-               dd->ipath_lasterror |= errs &
-                       ~(INFINIPATH_E_HARDWARE |
-                         INFINIPATH_E_IBSTATUSCHANGED);
-       }
-
-       if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
-               dd->ipath_spectriggerhit++;
-               ipath_dbg("%lu special trigger hits\n",
-                       dd->ipath_spectriggerhit);
-       }
-
-       /* likely due to cancel; so suppress message unless verbose */
-       if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-               time_after(dd->ipath_lastcancel, jiffies)) {
-               /* armlaunch takes precedence; it often causes both. */
-               ipath_cdbg(VERBOSE,
-                       "Suppressed %s error (%llx) after sendbuf cancel\n",
-                       (errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
-                       "armlaunch" : "sendpktlen", (unsigned long long)errs);
-               errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
-       }
-
-       if (!errs)
-               return 0;
-
-       if (!noprint) {
-               ipath_err_t mask;
-               /*
-                * The ones we mask off are handled specially below
-                * or above.  Also mask SDMADISABLED by default as it
-                * is too chatty.
-                */
-               mask = INFINIPATH_E_IBSTATUSCHANGED |
-                       INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                       INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
-
-               /* if we're in debug, then don't mask SDMADISABLED msgs */
-               if (ipath_debug & __IPATH_DBG)
-                       mask &= ~INFINIPATH_E_SDMADISABLED;
-
-               ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
-       } else
-               /* so we don't need if (!noprint) at strlcat's below */
-               *msg = 0;
-
-       if (errs & E_SUM_PKTERRS) {
-               ipath_stats.sps_pkterrs++;
-               chkerrpkts = 1;
-       }
-       if (errs & E_SUM_ERRS)
-               ipath_stats.sps_errs++;
-
-       if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
-               ipath_stats.sps_crcerrs++;
-               chkerrpkts = 1;
-       }
-       iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
-
-
-       /*
-        * We don't want to print these two as they happen, or we can make
-        * the situation even worse, because it takes so long to print
-        * messages to serial consoles.  Kernel ports get printed from
-        * fast_stats, no more than every 5 seconds, user ports get printed
-        * on close
-        */
-       if (errs & INFINIPATH_E_RRCVHDRFULL)
-               chkerrpkts |= handle_hdrq_full(dd);
-       if (errs & INFINIPATH_E_RRCVEGRFULL) {
-               struct ipath_portdata *pd = dd->ipath_pd[0];
-
-               /*
-                * since this is of less importance and not likely to
-                * happen without also getting hdrfull, only count
-                * occurrences; don't check each port (or even the kernel
-                * vs user)
-                */
-               ipath_stats.sps_etidfull++;
-               if (pd->port_head != ipath_get_hdrqtail(pd))
-                       chkerrpkts |= 1;
-       }
-
-       /*
-        * do this before IBSTATUSCHANGED, in case both bits set in a single
-        * interrupt; we want the STATUSCHANGE to "win", so we do our
-        * internal copy of state machine correctly
-        */
-       if (errs & INFINIPATH_E_RIBLOSTLINK) {
-               /*
-                * force through block below
-                */
-               errs |= INFINIPATH_E_IBSTATUSCHANGED;
-               ipath_stats.sps_iblink++;
-               dd->ipath_flags |= IPATH_LINKDOWN;
-               dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-                                    | IPATH_LINKARMED | IPATH_LINKACTIVE);
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-
-               ipath_dbg("Lost link, link now down (%s)\n",
-                       ipath_ibcstatus_str[ipath_read_kreg64(dd,
-                       dd->ipath_kregs->kr_ibcstatus) & 0xf]);
-       }
-       if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-               handle_e_ibstatuschanged(dd, errs);
-
-       if (errs & INFINIPATH_E_RESET) {
-               if (!noprint)
-                       ipath_dev_err(dd, "Got reset, requires re-init "
-                                     "(unload and reload driver)\n");
-               dd->ipath_flags &= ~IPATH_INITTED;      /* needs re-init */
-               /* mark as having had error */
-               *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-               *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
-       }
-
-       if (!noprint && *msg) {
-               if (iserr)
-                       ipath_dev_err(dd, "%s error\n", msg);
-       }
-       if (dd->ipath_state_wanted & dd->ipath_flags) {
-               ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
-                          "waking\n", dd->ipath_state_wanted,
-                          dd->ipath_flags);
-               wake_up_interruptible(&ipath_state_wait);
-       }
-
-       return chkerrpkts;
-}
-
-/*
- * try to cleanup as much as possible for anything that might have gone
- * wrong while in freeze mode, such as pio buffers being written by user
- * processes (causing armlaunch), send errors due to going into freeze mode,
- * etc., and try to avoid causing extra interrupts while doing so.
- * Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it while in freeze mode (the register values
- * themselves are kept correct).
- * Make sure that we don't lose any important interrupts by using the chip
- * feature that says that writing 0 to a bit in *clear that is set in
- * *status will cause an interrupt to be generated again (if allowed by
- * the *mask value).
- */
-void ipath_clear_freeze(struct ipath_devdata *dd)
-{
-       /* disable error interrupts, to avoid confusion */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
-
-       /* also disable interrupts; errormask is sometimes overwriten */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-       ipath_cancel_sends(dd, 1);
-
-       /* clear the freeze, and be sure chip saw it */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-                        dd->ipath_control);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-       /* force in-memory update now we are out of freeze */
-       ipath_force_pio_avail_update(dd);
-
-       /*
-        * force new interrupt if any hwerr, error or interrupt bits are
-        * still set, and clear "safe" send packet errors related to freeze
-        * and cancelling sends.  Re-enable error interrupts before possible
-        * force of re-interrupt on pending interrupts.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-               E_SPKT_ERRS_IGNORE);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-}
-
-
-/* this is separate to allow for better optimization of ipath_intr() */
-
-static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
-{
-       /*
-        * sometimes happen during driver init and unload, don't want
-        * to process any interrupts at that point
-        */
-
-       /* this is just a bandaid, not a fix, if something goes badly
-        * wrong */
-       if (++*unexpectp > 100) {
-               if (++*unexpectp > 105) {
-                       /*
-                        * ok, we must be taking somebody else's interrupts,
-                        * due to a messed up mptable and/or PIRQ table, so
-                        * unregister the interrupt.  We've seen this during
-                        * linuxbios development work, and it may happen in
-                        * the future again.
-                        */
-                       if (dd->pcidev && dd->ipath_irq) {
-                               ipath_dev_err(dd, "Now %u unexpected "
-                                             "interrupts, unregistering "
-                                             "interrupt handler\n",
-                                             *unexpectp);
-                               ipath_dbg("free_irq of irq %d\n",
-                                         dd->ipath_irq);
-                               dd->ipath_f_free_irq(dd);
-                       }
-               }
-               if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
-                       ipath_dev_err(dd, "%u unexpected interrupts, "
-                                     "disabling interrupts completely\n",
-                                     *unexpectp);
-                       /*
-                        * disable all interrupts, something is very wrong
-                        */
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-                                        0ULL);
-               }
-       } else if (*unexpectp > 1)
-               ipath_dbg("Interrupt when not ready, should not happen, "
-                         "ignoring\n");
-}
-
-static noinline void ipath_bad_regread(struct ipath_devdata *dd)
-{
-       static int allbits;
-
-       /* separate routine, for better optimization of ipath_intr() */
-
-       /*
-        * We print the message and disable interrupts, in hope of
-        * having a better chance of debugging the problem.
-        */
-       ipath_dev_err(dd,
-                     "Read of interrupt status failed (all bits set)\n");
-       if (allbits++) {
-               /* disable all interrupts, something is very wrong */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-               if (allbits == 2) {
-                       ipath_dev_err(dd, "Still bad interrupt status, "
-                                     "unregistering interrupt\n");
-                       dd->ipath_f_free_irq(dd);
-               } else if (allbits > 2) {
-                       if ((allbits % 10000) == 0)
-                               printk(".");
-               } else
-                       ipath_dev_err(dd, "Disabling interrupts, "
-                                     "multiple errors\n");
-       }
-}
-
-static void handle_layer_pioavail(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int ret;
-
-       ret = ipath_ib_piobufavail(dd->verbs_dev);
-       if (ret > 0)
-               goto set;
-
-       return;
-set:
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                        dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-/*
- * Handle receive interrupts for user ports; this means a user
- * process was waiting for a packet to arrive, and didn't want
- * to poll
- */
-static void handle_urcv(struct ipath_devdata *dd, u64 istat)
-{
-       u64 portr;
-       int i;
-       int rcvdint = 0;
-
-       /*
-        * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
-        * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
-        * would both like timely updates of the bits so that
-        * we don't pass them by unnecessarily.  the rmb()
-        * here ensures that we see them promptly -- the
-        * corresponding wmb()'s are in ipath_poll_urgent()
-        * and ipath_poll_next()...
-        */
-       rmb();
-       portr = ((istat >> dd->ipath_i_rcvavail_shift) &
-                dd->ipath_i_rcvavail_mask) |
-               ((istat >> dd->ipath_i_rcvurg_shift) &
-                dd->ipath_i_rcvurg_mask);
-       for (i = 1; i < dd->ipath_cfgports; i++) {
-               struct ipath_portdata *pd = dd->ipath_pd[i];
-
-               if (portr & (1 << i) && pd && pd->port_cnt) {
-                       if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
-                                              &pd->port_flag)) {
-                               clear_bit(i + dd->ipath_r_intravail_shift,
-                                         &dd->ipath_rcvctrl);
-                               wake_up_interruptible(&pd->port_wait);
-                               rcvdint = 1;
-                       } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
-                                                     &pd->port_flag)) {
-                               pd->port_urgent++;
-                               wake_up_interruptible(&pd->port_wait);
-                       }
-               }
-       }
-       if (rcvdint) {
-               /* only want to take one interrupt, so turn off the rcv
-                * interrupt for all the ports that we set the rcv_waiting
-                * (but never for kernel port)
-                */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-                                dd->ipath_rcvctrl);
-       }
-}
-
-irqreturn_t ipath_intr(int irq, void *data)
-{
-       struct ipath_devdata *dd = data;
-       u64 istat, chk0rcv = 0;
-       ipath_err_t estat = 0;
-       irqreturn_t ret;
-       static unsigned unexpected = 0;
-       u64 kportrbits;
-
-       ipath_stats.sps_ints++;
-
-       if (dd->ipath_int_counter != (u32) -1)
-               dd->ipath_int_counter++;
-
-       if (!(dd->ipath_flags & IPATH_PRESENT)) {
-               /*
-                * This return value is not great, but we do not want the
-                * interrupt core code to remove our interrupt handler
-                * because we don't appear to be handling an interrupt
-                * during a chip reset.
-                */
-               return IRQ_HANDLED;
-       }
-
-       /*
-        * this needs to be flags&initted, not statusp, so we keep
-        * taking interrupts even after link goes down, etc.
-        * Also, we *must* clear the interrupt at some point, or we won't
-        * take it again, which can be real bad for errors, etc...
-        */
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               ipath_bad_intr(dd, &unexpected);
-               ret = IRQ_NONE;
-               goto bail;
-       }
-
-       istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
-
-       if (unlikely(!istat)) {
-               ipath_stats.sps_nullintr++;
-               ret = IRQ_NONE; /* not our interrupt, or already handled */
-               goto bail;
-       }
-       if (unlikely(istat == -1)) {
-               ipath_bad_regread(dd);
-               /* don't know if it was our interrupt or not */
-               ret = IRQ_NONE;
-               goto bail;
-       }
-
-       if (unexpected)
-               unexpected = 0;
-
-       if (unlikely(istat & ~dd->ipath_i_bitsextant))
-               ipath_dev_err(dd,
-                             "interrupt with unknown interrupts %Lx set\n",
-                             (unsigned long long)
-                             istat & ~dd->ipath_i_bitsextant);
-       else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
-               ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
-                       (unsigned long long) istat);
-
-       if (istat & INFINIPATH_I_ERROR) {
-               ipath_stats.sps_errints++;
-               estat = ipath_read_kreg64(dd,
-                                         dd->ipath_kregs->kr_errorstatus);
-               if (!estat)
-                       dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
-                                "but no error bits set!\n",
-                                (unsigned long long) istat);
-               else if (estat == -1LL)
-                       /*
-                        * should we try clearing all, or hope next read
-                        * works?
-                        */
-                       ipath_dev_err(dd, "Read of error status failed "
-                                     "(all bits set); ignoring\n");
-               else
-                       chk0rcv |= handle_errors(dd, estat);
-       }
-
-       if (istat & INFINIPATH_I_GPIO) {
-               /*
-                * GPIO interrupts fall in two broad classes:
-                * GPIO_2 indicates (on some HT4xx boards) that a packet
-                *        has arrived for Port 0. Checking for this
-                *        is controlled by flag IPATH_GPIO_INTR.
-                * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
-                *        errors that we need to count. Checking for this
-                *        is controlled by flag IPATH_GPIO_ERRINTRS.
-                */
-               u32 gpiostatus;
-               u32 to_clear = 0;
-
-               gpiostatus = ipath_read_kreg32(
-                       dd, dd->ipath_kregs->kr_gpio_status);
-               /* First the error-counter case. */
-               if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
-                   (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
-                       /* want to clear the bits we see asserted. */
-                       to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
-
-                       /*
-                        * Count appropriately, clear bits out of our copy,
-                        * as they have been "handled".
-                        */
-                       if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
-                               ipath_dbg("FlowCtl on UnsupVL\n");
-                               dd->ipath_rxfc_unsupvl_errs++;
-                       }
-                       if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
-                               ipath_dbg("Overrun Threshold exceeded\n");
-                               dd->ipath_overrun_thresh_errs++;
-                       }
-                       if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
-                               ipath_dbg("Local Link Integrity error\n");
-                               dd->ipath_lli_errs++;
-                       }
-                       gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
-               }
-               /* Now the Port0 Receive case */
-               if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
-                   (dd->ipath_flags & IPATH_GPIO_INTR)) {
-                       /*
-                        * GPIO status bit 2 is set, and we expected it.
-                        * clear it and indicate in p0bits.
-                        * This probably only happens if a Port0 pkt
-                        * arrives at _just_ the wrong time, and we
-                        * handle that by seting chk0rcv;
-                        */
-                       to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
-                       gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
-                       chk0rcv = 1;
-               }
-               if (gpiostatus) {
-                       /*
-                        * Some unexpected bits remain. If they could have
-                        * caused the interrupt, complain and clear.
-                        * To avoid repetition of this condition, also clear
-                        * the mask. It is almost certainly due to error.
-                        */
-                       const u32 mask = (u32) dd->ipath_gpio_mask;
-
-                       if (mask & gpiostatus) {
-                               ipath_dbg("Unexpected GPIO IRQ bits %x\n",
-                                 gpiostatus & mask);
-                               to_clear |= (gpiostatus & mask);
-                               dd->ipath_gpio_mask &= ~(gpiostatus & mask);
-                               ipath_write_kreg(dd,
-                                       dd->ipath_kregs->kr_gpio_mask,
-                                       dd->ipath_gpio_mask);
-                       }
-               }
-               if (to_clear) {
-                       ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-                                       (u64) to_clear);
-               }
-       }
-
-       /*
-        * Clear the interrupt bits we found set, unless they are receive
-        * related, in which case we already cleared them above, and don't
-        * want to clear them again, because we might lose an interrupt.
-        * Clear it early, so we "know" know the chip will have seen this by
-        * the time we process the queue, and will re-interrupt if necessary.
-        * The processor itself won't take the interrupt again until we return.
-        */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
-
-       /*
-        * Handle kernel receive queues before checking for pio buffers
-        * available since receives can overflow; piobuf waiters can afford
-        * a few extra cycles, since they were waiting anyway, and user's
-        * waiting for receive are at the bottom.
-        */
-       kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
-               (1ULL << dd->ipath_i_rcvurg_shift);
-       if (chk0rcv || (istat & kportrbits)) {
-               istat &= ~kportrbits;
-               ipath_kreceive(dd->ipath_pd[0]);
-       }
-
-       if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
-                    (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
-               handle_urcv(dd, istat);
-
-       if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
-               handle_sdma_intr(dd, istat);
-
-       if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-               /* always process; sdma verbs uses PIO for acks and VL15  */
-               handle_layer_pioavail(dd);
-       }
-
-       ret = IRQ_HANDLED;
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_kernel.h b/drivers/staging/rdma/ipath/ipath_kernel.h
deleted file mode 100644 (file)
index 66c934a..0000000
+++ /dev/null
@@ -1,1374 +0,0 @@
-#ifndef _IPATH_KERNEL_H
-#define _IPATH_KERNEL_H
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This header file is the base header file for infinipath kernel code
- * ipath_user.h serves a similar purpose for user code.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_common.h"
-#include "ipath_debug.h"
-#include "ipath_registers.h"
-
-/* only s/w major version of InfiniPath we can handle */
-#define IPATH_CHIP_VERS_MAJ 2U
-
-/* don't care about this except printing */
-#define IPATH_CHIP_VERS_MIN 0U
-
-/* temporary, maybe always */
-extern struct infinipath_stats ipath_stats;
-
-#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
-/*
- * First-cut critierion for "device is active" is
- * two thousand dwords combined Tx, Rx traffic per
- * 5-second interval. SMA packets are 64 dwords,
- * and occur "a few per second", presumably each way.
- */
-#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
-/*
- * Struct used to indicate which errors are logged in each of the
- * error-counters that are logged to EEPROM. A counter is incremented
- * _once_ (saturating at 255) for each event with any bits set in
- * the error or hwerror register masks below.
- */
-#define IPATH_EEP_LOG_CNT (4)
-struct ipath_eep_log_mask {
-       u64 errs_to_log;
-       u64 hwerrs_to_log;
-};
-
-struct ipath_portdata {
-       void **port_rcvegrbuf;
-       dma_addr_t *port_rcvegrbuf_phys;
-       /* rcvhdrq base, needs mmap before useful */
-       void *port_rcvhdrq;
-       /* kernel virtual address where hdrqtail is updated */
-       void *port_rcvhdrtail_kvaddr;
-       /*
-        * temp buffer for expected send setup, allocated at open, instead
-        * of each setup call
-        */
-       void *port_tid_pg_list;
-       /* when waiting for rcv or pioavail */
-       wait_queue_head_t port_wait;
-       /*
-        * rcvegr bufs base, physical, must fit
-        * in 44 bits so 32 bit programs mmap64 44 bit works)
-        */
-       dma_addr_t port_rcvegr_phys;
-       /* mmap of hdrq, must fit in 44 bits */
-       dma_addr_t port_rcvhdrq_phys;
-       dma_addr_t port_rcvhdrqtailaddr_phys;
-       /*
-        * number of opens (including slave subports) on this instance
-        * (ignoring forks, dup, etc. for now)
-        */
-       int port_cnt;
-       /*
-        * how much space to leave at start of eager TID entries for
-        * protocol use, on each TID
-        */
-       /* instead of calculating it */
-       unsigned port_port;
-       /* non-zero if port is being shared. */
-       u16 port_subport_cnt;
-       /* non-zero if port is being shared. */
-       u16 port_subport_id;
-       /* number of pio bufs for this port (all procs, if shared) */
-       u32 port_piocnt;
-       /* first pio buffer for this port */
-       u32 port_pio_base;
-       /* chip offset of PIO buffers for this port */
-       u32 port_piobufs;
-       /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
-       u32 port_rcvegrbuf_chunks;
-       /* how many egrbufs per chunk */
-       u32 port_rcvegrbufs_perchunk;
-       /* order for port_rcvegrbuf_pages */
-       size_t port_rcvegrbuf_size;
-       /* rcvhdrq size (for freeing) */
-       size_t port_rcvhdrq_size;
-       /* next expected TID to check when looking for free */
-       u32 port_tidcursor;
-       /* next expected TID to check */
-       unsigned long port_flag;
-       /* what happened */
-       unsigned long int_flag;
-       /* WAIT_RCV that timed out, no interrupt */
-       u32 port_rcvwait_to;
-       /* WAIT_PIO that timed out, no interrupt */
-       u32 port_piowait_to;
-       /* WAIT_RCV already happened, no wait */
-       u32 port_rcvnowait;
-       /* WAIT_PIO already happened, no wait */
-       u32 port_pionowait;
-       /* total number of rcvhdrqfull errors */
-       u32 port_hdrqfull;
-       /*
-        * Used to suppress multiple instances of same
-        * port staying stuck at same point.
-        */
-       u32 port_lastrcvhdrqtail;
-       /* saved total number of rcvhdrqfull errors for poll edge trigger */
-       u32 port_hdrqfull_poll;
-       /* total number of polled urgent packets */
-       u32 port_urgent;
-       /* saved total number of polled urgent packets for poll edge trigger */
-       u32 port_urgent_poll;
-       /* pid of process using this port */
-       struct pid *port_pid;
-       struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
-       /* same size as task_struct .comm[] */
-       char port_comm[TASK_COMM_LEN];
-       /* pkeys set by this use of this port */
-       u16 port_pkeys[4];
-       /* so file ops can get at unit */
-       struct ipath_devdata *port_dd;
-       /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
-       void *subport_uregbase;
-       /* An array of pages for the eager receive buffers * N */
-       void *subport_rcvegrbuf;
-       /* An array of pages for the eager header queue entries * N */
-       void *subport_rcvhdr_base;
-       /* The version of the library which opened this port */
-       u32 userversion;
-       /* Bitmask of active slaves */
-       u32 active_slaves;
-       /* Type of packets or conditions we want to poll for */
-       u16 poll_type;
-       /* port rcvhdrq head offset */
-       u32 port_head;
-       /* receive packet sequence counter */
-       u32 port_seq_cnt;
-};
-
-struct sk_buff;
-struct ipath_sge_state;
-struct ipath_verbs_txreq;
-
-/*
- * control information for layered drivers
- */
-struct _ipath_layer {
-       void *l_arg;
-};
-
-struct ipath_skbinfo {
-       struct sk_buff *skb;
-       dma_addr_t phys;
-};
-
-struct ipath_sdma_txreq {
-       int                 flags;
-       int                 sg_count;
-       union {
-               struct scatterlist *sg;
-               void *map_addr;
-       };
-       void              (*callback)(void *, int);
-       void               *callback_cookie;
-       int                 callback_status;
-       u16                 start_idx;  /* sdma private */
-       u16                 next_descq_idx;  /* sdma private */
-       struct list_head    list;       /* sdma private */
-};
-
-struct ipath_sdma_desc {
-       __le64 qw[2];
-};
-
-#define IPATH_SDMA_TXREQ_F_USELARGEBUF  0x1
-#define IPATH_SDMA_TXREQ_F_HEADTOHOST   0x2
-#define IPATH_SDMA_TXREQ_F_INTREQ       0x4
-#define IPATH_SDMA_TXREQ_F_FREEBUF      0x8
-#define IPATH_SDMA_TXREQ_F_FREEDESC     0x10
-#define IPATH_SDMA_TXREQ_F_VL15         0x20
-
-#define IPATH_SDMA_TXREQ_S_OK        0
-#define IPATH_SDMA_TXREQ_S_SENDERROR 1
-#define IPATH_SDMA_TXREQ_S_ABORTED   2
-#define IPATH_SDMA_TXREQ_S_SHUTDOWN  3
-
-#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG    (1ull << 63)
-#define IPATH_SDMA_STATUS_ABORT_IN_PROG                        (1ull << 62)
-#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE         (1ull << 61)
-#define IPATH_SDMA_STATUS_SCB_EMPTY                    (1ull << 30)
-
-/* max dwords in small buffer packet */
-#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
-
-/*
- * Possible IB config parameters for ipath_f_get/set_ib_cfg()
- */
-#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
-#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
-#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
-#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
-#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
-#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
-#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
-#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
-#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
-#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
-#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
-
-
-struct ipath_devdata {
-       struct list_head ipath_list;
-
-       struct ipath_kregs const *ipath_kregs;
-       struct ipath_cregs const *ipath_cregs;
-
-       /* mem-mapped pointer to base of chip regs */
-       u64 __iomem *ipath_kregbase;
-       /* end of mem-mapped chip space; range checking */
-       u64 __iomem *ipath_kregend;
-       /* physical address of chip for io_remap, etc. */
-       unsigned long ipath_physaddr;
-       /* base of memory alloced for ipath_kregbase, for free */
-       u64 *ipath_kregalloc;
-       /* ipath_cfgports pointers */
-       struct ipath_portdata **ipath_pd;
-       /* sk_buffs used by port 0 eager receive queue */
-       struct ipath_skbinfo *ipath_port0_skbinfo;
-       /* kvirt address of 1st 2k pio buffer */
-       void __iomem *ipath_pio2kbase;
-       /* kvirt address of 1st 4k pio buffer */
-       void __iomem *ipath_pio4kbase;
-       /*
-        * points to area where PIOavail registers will be DMA'ed.
-        * Has to be on a page of it's own, because the page will be
-        * mapped into user program space.  This copy is *ONLY* ever
-        * written by DMA, not by the driver!  Need a copy per device
-        * when we get to multiple devices
-        */
-       volatile __le64 *ipath_pioavailregs_dma;
-       /* physical address where updates occur */
-       dma_addr_t ipath_pioavailregs_phys;
-       struct _ipath_layer ipath_layer;
-       /* setup intr */
-       int (*ipath_f_intrsetup)(struct ipath_devdata *);
-       /* fallback to alternate interrupt type if possible */
-       int (*ipath_f_intr_fallback)(struct ipath_devdata *);
-       /* setup on-chip bus config */
-       int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
-       /* hard reset chip */
-       int (*ipath_f_reset)(struct ipath_devdata *);
-       int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
-                                    size_t);
-       void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
-       void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
-                                       size_t);
-       void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
-       int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
-       int (*ipath_f_early_init)(struct ipath_devdata *);
-       void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
-       void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
-                               u32, unsigned long);
-       void (*ipath_f_tidtemplate)(struct ipath_devdata *);
-       void (*ipath_f_cleanup)(struct ipath_devdata *);
-       void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
-       /* fill out chip-specific fields */
-       int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-       /* free irq */
-       void (*ipath_f_free_irq)(struct ipath_devdata *);
-       struct ipath_message_header *(*ipath_f_get_msgheader)
-                                       (struct ipath_devdata *, __le32 *);
-       void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
-       int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
-       int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
-       void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
-       void (*ipath_f_read_counters)(struct ipath_devdata *,
-                                       struct infinipath_counters *);
-       void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
-       /* per chip actions needed for IB Link up/down changes */
-       int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
-
-       unsigned ipath_lastegr_idx;
-       struct ipath_ibdev *verbs_dev;
-       struct timer_list verbs_timer;
-       /* total dwords sent (summed from counter) */
-       u64 ipath_sword;
-       /* total dwords rcvd (summed from counter) */
-       u64 ipath_rword;
-       /* total packets sent (summed from counter) */
-       u64 ipath_spkts;
-       /* total packets rcvd (summed from counter) */
-       u64 ipath_rpkts;
-       /* ipath_statusp initially points to this. */
-       u64 _ipath_status;
-       /* GUID for this interface, in network order */
-       __be64 ipath_guid;
-       /*
-        * aggregrate of error bits reported since last cleared, for
-        * limiting of error reporting
-        */
-       ipath_err_t ipath_lasterror;
-       /*
-        * aggregrate of error bits reported since last cleared, for
-        * limiting of hwerror reporting
-        */
-       ipath_err_t ipath_lasthwerror;
-       /* errors masked because they occur too fast */
-       ipath_err_t ipath_maskederrs;
-       u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
-       /* these 5 fields are used to establish deltas for IB Symbol
-        * errors and linkrecovery errors. They can be reported on
-        * some chips during link negotiation prior to INIT, and with
-        * DDR when faking DDR negotiations with non-IBTA switches.
-        * The chip counters are adjusted at driver unload if there is
-        * a non-zero delta.
-        */
-       u64 ibdeltainprog;
-       u64 ibsymdelta;
-       u64 ibsymsnap;
-       u64 iblnkerrdelta;
-       u64 iblnkerrsnap;
-
-       /* time in jiffies at which to re-enable maskederrs */
-       unsigned long ipath_unmasktime;
-       /* count of egrfull errors, combined for all ports */
-       u64 ipath_last_tidfull;
-       /* for ipath_qcheck() */
-       u64 ipath_lastport0rcv_cnt;
-       /* template for writing TIDs  */
-       u64 ipath_tidtemplate;
-       /* value to write to free TIDs */
-       u64 ipath_tidinvalid;
-       /* IBA6120 rcv interrupt setup */
-       u64 ipath_rhdrhead_intr_off;
-
-       /* size of memory at ipath_kregbase */
-       u32 ipath_kregsize;
-       /* number of registers used for pioavail */
-       u32 ipath_pioavregs;
-       /* IPATH_POLL, etc. */
-       u32 ipath_flags;
-       /* ipath_flags driver is waiting for */
-       u32 ipath_state_wanted;
-       /* last buffer for user use, first buf for kernel use is this
-        * index. */
-       u32 ipath_lastport_piobuf;
-       /* is a stats timer active */
-       u32 ipath_stats_timer_active;
-       /* number of interrupts for this device -- saturates... */
-       u32 ipath_int_counter;
-       /* dwords sent read from counter */
-       u32 ipath_lastsword;
-       /* dwords received read from counter */
-       u32 ipath_lastrword;
-       /* sent packets read from counter */
-       u32 ipath_lastspkts;
-       /* received packets read from counter */
-       u32 ipath_lastrpkts;
-       /* pio bufs allocated per port */
-       u32 ipath_pbufsport;
-       /* if remainder on bufs/port, ports < extrabuf get 1 extra */
-       u32 ipath_ports_extrabuf;
-       u32 ipath_pioupd_thresh; /* update threshold, some chips */
-       /*
-        * number of ports configured as max; zero is set to number chip
-        * supports, less gives more pio bufs/port, etc.
-        */
-       u32 ipath_cfgports;
-       /* count of port 0 hdrqfull errors */
-       u32 ipath_p0_hdrqfull;
-       /* port 0 number of receive eager buffers */
-       u32 ipath_p0_rcvegrcnt;
-
-       /*
-        * index of last piobuffer we used.  Speeds up searching, by
-        * starting at this point.  Doesn't matter if multiple cpu's use and
-        * update, last updater is only write that matters.  Whenever it
-        * wraps, we update shadow copies.  Need a copy per device when we
-        * get to multiple devices
-        */
-       u32 ipath_lastpioindex;
-       u32 ipath_lastpioindexl;
-       /* max length of freezemsg */
-       u32 ipath_freezelen;
-       /*
-        * consecutive times we wanted a PIO buffer but were unable to
-        * get one
-        */
-       u32 ipath_consec_nopiobuf;
-       /*
-        * hint that we should update ipath_pioavailshadow before
-        * looking for a PIO buffer
-        */
-       u32 ipath_upd_pio_shadow;
-       /* so we can rewrite it after a chip reset */
-       u32 ipath_pcibar0;
-       /* so we can rewrite it after a chip reset */
-       u32 ipath_pcibar1;
-       u32 ipath_x1_fix_tries;
-       u32 ipath_autoneg_tries;
-       u32 serdes_first_init_done;
-
-       struct ipath_relock {
-               atomic_t ipath_relock_timer_active;
-               struct timer_list ipath_relock_timer;
-               unsigned int ipath_relock_interval; /* in jiffies */
-       } ipath_relock_singleton;
-
-       /* interrupt number */
-       int ipath_irq;
-       /* HT/PCI Vendor ID (here for NodeInfo) */
-       u16 ipath_vendorid;
-       /* HT/PCI Device ID (here for NodeInfo) */
-       u16 ipath_deviceid;
-       /* offset in HT config space of slave/primary interface block */
-       u8 ipath_ht_slave_off;
-       /* for write combining settings */
-       int wc_cookie;
-       /* ref count for each pkey */
-       atomic_t ipath_pkeyrefs[4];
-       /* shadow copy of struct page *'s for exp tid pages */
-       struct page **ipath_pageshadow;
-       /* shadow copy of dma handles for exp tid pages */
-       dma_addr_t *ipath_physshadow;
-       u64 __iomem *ipath_egrtidbase;
-       /* lock to workaround chip bug 9437 and others */
-       spinlock_t ipath_kernel_tid_lock;
-       spinlock_t ipath_user_tid_lock;
-       spinlock_t ipath_sendctrl_lock;
-       /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
-       spinlock_t ipath_uctxt_lock;
-
-       /*
-        * IPATH_STATUS_*,
-        * this address is mapped readonly into user processes so they can
-        * get status cheaply, whenever they want.
-        */
-       u64 *ipath_statusp;
-       /* freeze msg if hw error put chip in freeze */
-       char *ipath_freezemsg;
-       /* pci access data structure */
-       struct pci_dev *pcidev;
-       struct cdev *user_cdev;
-       struct cdev *diag_cdev;
-       struct device *user_dev;
-       struct device *diag_dev;
-       /* timer used to prevent stats overflow, error throttling, etc. */
-       struct timer_list ipath_stats_timer;
-       /* timer to verify interrupts work, and fallback if possible */
-       struct timer_list ipath_intrchk_timer;
-       void *ipath_dummy_hdrq; /* used after port close */
-       dma_addr_t ipath_dummy_hdrq_phys;
-
-       /* SendDMA related entries */
-       spinlock_t            ipath_sdma_lock;
-       unsigned long         ipath_sdma_status;
-       unsigned long         ipath_sdma_abort_jiffies;
-       unsigned long         ipath_sdma_abort_intr_timeout;
-       unsigned long         ipath_sdma_buf_jiffies;
-       struct ipath_sdma_desc *ipath_sdma_descq;
-       u64                   ipath_sdma_descq_added;
-       u64                   ipath_sdma_descq_removed;
-       int                   ipath_sdma_desc_nreserved;
-       u16                   ipath_sdma_descq_cnt;
-       u16                   ipath_sdma_descq_tail;
-       u16                   ipath_sdma_descq_head;
-       u16                   ipath_sdma_next_intr;
-       u16                   ipath_sdma_reset_wait;
-       u8                    ipath_sdma_generation;
-       struct tasklet_struct ipath_sdma_abort_task;
-       struct tasklet_struct ipath_sdma_notify_task;
-       struct list_head      ipath_sdma_activelist;
-       struct list_head      ipath_sdma_notifylist;
-       atomic_t              ipath_sdma_vl15_count;
-       struct timer_list     ipath_sdma_vl15_timer;
-
-       dma_addr_t       ipath_sdma_descq_phys;
-       volatile __le64 *ipath_sdma_head_dma;
-       dma_addr_t       ipath_sdma_head_phys;
-
-       unsigned long ipath_ureg_align; /* user register alignment */
-
-       struct delayed_work ipath_autoneg_work;
-       wait_queue_head_t ipath_autoneg_wait;
-
-       /* HoL blocking / user app forward-progress state */
-       unsigned          ipath_hol_state;
-       unsigned          ipath_hol_next;
-       struct timer_list ipath_hol_timer;
-
-       /*
-        * Shadow copies of registers; size indicates read access size.
-        * Most of them are readonly, but some are write-only register,
-        * where we manipulate the bits in the shadow copy, and then write
-        * the shadow copy to infinipath.
-        *
-        * We deliberately make most of these 32 bits, since they have
-        * restricted range.  For any that we read, we won't to generate 32
-        * bit accesses, since Opteron will generate 2 separate 32 bit HT
-        * transactions for a 64 bit read, and we want to avoid unnecessary
-        * HT transactions.
-        */
-
-       /* This is the 64 bit group */
-
-       /*
-        * shadow of pioavail, check to be sure it's large enough at
-        * init time.
-        */
-       unsigned long ipath_pioavailshadow[8];
-       /* bitmap of send buffers available for the kernel to use with PIO. */
-       unsigned long ipath_pioavailkernel[8];
-       /* shadow of kr_gpio_out, for rmw ops */
-       u64 ipath_gpio_out;
-       /* shadow the gpio mask register */
-       u64 ipath_gpio_mask;
-       /* shadow the gpio output enable, etc... */
-       u64 ipath_extctrl;
-       /* kr_revision shadow */
-       u64 ipath_revision;
-       /*
-        * shadow of ibcctrl, for interrupt handling of link changes,
-        * etc.
-        */
-       u64 ipath_ibcctrl;
-       /*
-        * last ibcstatus, to suppress "duplicate" status change messages,
-        * mostly from 2 to 3
-        */
-       u64 ipath_lastibcstat;
-       /* hwerrmask shadow */
-       ipath_err_t ipath_hwerrmask;
-       ipath_err_t ipath_errormask; /* errormask shadow */
-       /* interrupt config reg shadow */
-       u64 ipath_intconfig;
-       /* kr_sendpiobufbase value */
-       u64 ipath_piobufbase;
-       /* kr_ibcddrctrl shadow */
-       u64 ipath_ibcddrctrl;
-
-       /* these are the "32 bit" regs */
-
-       /*
-        * number of GUIDs in the flash for this interface; may need some
-        * rethinking for setting on other ifaces
-        */
-       u32 ipath_nguid;
-       /*
-        * the following two are 32-bit bitmasks, but {test,clear,set}_bit
-        * all expect bit fields to be "unsigned long"
-        */
-       /* shadow kr_rcvctrl */
-       unsigned long ipath_rcvctrl;
-       /* shadow kr_sendctrl */
-       unsigned long ipath_sendctrl;
-       /* to not count armlaunch after cancel */
-       unsigned long ipath_lastcancel;
-       /* count cases where special trigger was needed (double write) */
-       unsigned long ipath_spectriggerhit;
-
-       /* value we put in kr_rcvhdrcnt */
-       u32 ipath_rcvhdrcnt;
-       /* value we put in kr_rcvhdrsize */
-       u32 ipath_rcvhdrsize;
-       /* value we put in kr_rcvhdrentsize */
-       u32 ipath_rcvhdrentsize;
-       /* offset of last entry in rcvhdrq */
-       u32 ipath_hdrqlast;
-       /* kr_portcnt value */
-       u32 ipath_portcnt;
-       /* kr_pagealign value */
-       u32 ipath_palign;
-       /* number of "2KB" PIO buffers */
-       u32 ipath_piobcnt2k;
-       /* size in bytes of "2KB" PIO buffers */
-       u32 ipath_piosize2k;
-       /* number of "4KB" PIO buffers */
-       u32 ipath_piobcnt4k;
-       /* size in bytes of "4KB" PIO buffers */
-       u32 ipath_piosize4k;
-       u32 ipath_pioreserved; /* reserved special-inkernel; */
-       /* kr_rcvegrbase value */
-       u32 ipath_rcvegrbase;
-       /* kr_rcvegrcnt value */
-       u32 ipath_rcvegrcnt;
-       /* kr_rcvtidbase value */
-       u32 ipath_rcvtidbase;
-       /* kr_rcvtidcnt value */
-       u32 ipath_rcvtidcnt;
-       /* kr_sendregbase */
-       u32 ipath_sregbase;
-       /* kr_userregbase */
-       u32 ipath_uregbase;
-       /* kr_counterregbase */
-       u32 ipath_cregbase;
-       /* shadow the control register contents */
-       u32 ipath_control;
-       /* PCI revision register (HTC rev on FPGA) */
-       u32 ipath_pcirev;
-
-       /* chip address space used by 4k pio buffers */
-       u32 ipath_4kalign;
-       /* The MTU programmed for this unit */
-       u32 ipath_ibmtu;
-       /*
-        * The max size IB packet, included IB headers that we can send.
-        * Starts same as ipath_piosize, but is affected when ibmtu is
-        * changed, or by size of eager buffers
-        */
-       u32 ipath_ibmaxlen;
-       /*
-        * ibmaxlen at init time, limited by chip and by receive buffer
-        * size.  Not changed after init.
-        */
-       u32 ipath_init_ibmaxlen;
-       /* size of each rcvegrbuffer */
-       u32 ipath_rcvegrbufsize;
-       /* localbus width (1, 2,4,8,16,32) from config space  */
-       u32 ipath_lbus_width;
-       /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
-       u32 ipath_lbus_speed;
-       /*
-        * number of sequential ibcstatus change for polling active/quiet
-        * (i.e., link not coming up).
-        */
-       u32 ipath_ibpollcnt;
-       /* low and high portions of MSI capability/vector */
-       u32 ipath_msi_lo;
-       /* saved after PCIe init for restore after reset */
-       u32 ipath_msi_hi;
-       /* MSI data (vector) saved for restore */
-       u16 ipath_msi_data;
-       /* MLID programmed for this instance */
-       u16 ipath_mlid;
-       /* LID programmed for this instance */
-       u16 ipath_lid;
-       /* list of pkeys programmed; 0 if not set */
-       u16 ipath_pkeys[4];
-       /*
-        * ASCII serial number, from flash, large enough for original
-        * all digit strings, and longer QLogic serial number format
-        */
-       u8 ipath_serial[16];
-       /* human readable board version */
-       u8 ipath_boardversion[96];
-       u8 ipath_lbus_info[32]; /* human readable localbus info */
-       /* chip major rev, from ipath_revision */
-       u8 ipath_majrev;
-       /* chip minor rev, from ipath_revision */
-       u8 ipath_minrev;
-       /* board rev, from ipath_revision */
-       u8 ipath_boardrev;
-       /* saved for restore after reset */
-       u8 ipath_pci_cacheline;
-       /* LID mask control */
-       u8 ipath_lmc;
-       /* link width supported */
-       u8 ipath_link_width_supported;
-       /* link speed supported */
-       u8 ipath_link_speed_supported;
-       u8 ipath_link_width_enabled;
-       u8 ipath_link_speed_enabled;
-       u8 ipath_link_width_active;
-       u8 ipath_link_speed_active;
-       /* Rx Polarity inversion (compensate for ~tx on partner) */
-       u8 ipath_rx_pol_inv;
-
-       u8 ipath_r_portenable_shift;
-       u8 ipath_r_intravail_shift;
-       u8 ipath_r_tailupd_shift;
-       u8 ipath_r_portcfg_shift;
-
-       /* unit # of this chip, if present */
-       int ipath_unit;
-
-       /* local link integrity counter */
-       u32 ipath_lli_counter;
-       /* local link integrity errors */
-       u32 ipath_lli_errors;
-       /*
-        * Above counts only cases where _successive_ LocalLinkIntegrity
-        * errors were seen in the receive headers of kern-packets.
-        * Below are the three (monotonically increasing) counters
-        * maintained via GPIO interrupts on iba6120-rev2.
-        */
-       u32 ipath_rxfc_unsupvl_errs;
-       u32 ipath_overrun_thresh_errs;
-       u32 ipath_lli_errs;
-
-       /*
-        * Not all devices managed by a driver instance are the same
-        * type, so these fields must be per-device.
-        */
-       u64 ipath_i_bitsextant;
-       ipath_err_t ipath_e_bitsextant;
-       ipath_err_t ipath_hwe_bitsextant;
-
-       /*
-        * Below should be computable from number of ports,
-        * since they are never modified.
-        */
-       u64 ipath_i_rcvavail_mask;
-       u64 ipath_i_rcvurg_mask;
-       u16 ipath_i_rcvurg_shift;
-       u16 ipath_i_rcvavail_shift;
-
-       /*
-        * Register bits for selecting i2c direction and values, used for
-        * I2C serial flash.
-        */
-       u8 ipath_gpio_sda_num;
-       u8 ipath_gpio_scl_num;
-       u8 ipath_i2c_chain_type;
-       u64 ipath_gpio_sda;
-       u64 ipath_gpio_scl;
-
-       /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
-       spinlock_t ipath_gpio_lock;
-
-       /*
-        * IB link and linktraining states and masks that vary per chip in
-        * some way.  Set at init, to avoid each IB status change interrupt
-        */
-       u8 ibcs_ls_shift;
-       u8 ibcs_lts_mask;
-       u32 ibcs_mask;
-       u32 ib_init;
-       u32 ib_arm;
-       u32 ib_active;
-
-       u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
-
-       /*
-        * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
-        * reg. Changes for IBA7220
-        */
-       u8 ibcc_lic_mask; /* LinkInitCmd */
-       u8 ibcc_lc_shift; /* LinkCmd */
-       u8 ibcc_mpl_shift; /* Maxpktlen */
-
-       u8 delay_mult;
-
-       /* used to override LED behavior */
-       u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
-       u16 ipath_led_override_timeoff; /* delta to next timer event */
-       u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
-       u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
-       atomic_t ipath_led_override_timer_active;
-       /* Used to flash LEDs in override mode */
-       struct timer_list ipath_led_override_timer;
-
-       /* Support (including locks) for EEPROM logging of errors and time */
-       /* control access to actual counters, timer */
-       spinlock_t ipath_eep_st_lock;
-       /* control high-level access to EEPROM */
-       struct mutex ipath_eep_lock;
-       /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
-       uint64_t ipath_traffic_wds;
-       /* active time is kept in seconds, but logged in hours */
-       atomic_t ipath_active_time;
-       /* Below are nominal shadow of EEPROM, new since last EEPROM update */
-       uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
-       uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
-       uint16_t ipath_eep_hrs;
-       /*
-        * masks for which bits of errs, hwerrs that cause
-        * each of the counters to increment.
-        */
-       struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
-
-       /* interrupt mitigation reload register info */
-       u16 ipath_jint_idle_ticks;      /* idle clock ticks */
-       u16 ipath_jint_max_packets;     /* max packets across all ports */
-
-       /*
-        * lock for access to SerDes, and flags to sequence preset
-        * versus steady-state. 7220-only at the moment.
-        */
-       spinlock_t ipath_sdepb_lock;
-       u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
-};
-
-/* ipath_hol_state values (stopping/starting user proc, send flushing) */
-#define IPATH_HOL_UP       0
-#define IPATH_HOL_DOWN     1
-/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
-#define IPATH_HOL_DOWNSTOP 0
-#define IPATH_HOL_DOWNCONT 1
-
-/* bit positions for sdma_status */
-#define IPATH_SDMA_ABORTING  0
-#define IPATH_SDMA_DISARMED  1
-#define IPATH_SDMA_DISABLED  2
-#define IPATH_SDMA_LAYERBUF  3
-#define IPATH_SDMA_RUNNING  30
-#define IPATH_SDMA_SHUTDOWN 31
-
-/* bit combinations that correspond to abort states */
-#define IPATH_SDMA_ABORT_NONE 0
-#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
-#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED))
-#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
-       (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-
-#define IPATH_SDMA_BUF_NONE 0
-#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
-
-/* Private data for file operations */
-struct ipath_filedata {
-       struct ipath_portdata *pd;
-       unsigned subport;
-       unsigned tidcursor;
-       struct ipath_user_sdma_queue *pq;
-};
-extern struct list_head ipath_dev_list;
-extern spinlock_t ipath_devs_lock;
-extern struct ipath_devdata *ipath_lookup(int unit);
-
-int ipath_init_chip(struct ipath_devdata *, int);
-int ipath_enable_wc(struct ipath_devdata *dd);
-void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
-void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_clear_freeze(struct ipath_devdata *);
-
-struct file_operations;
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-                   struct cdev **cdevp, struct device **devp);
-void ipath_cdev_cleanup(struct cdev **cdevp,
-                       struct device **devp);
-
-int ipath_diag_add(struct ipath_devdata *);
-void ipath_diag_remove(struct ipath_devdata *);
-
-extern wait_queue_head_t ipath_state_wait;
-
-int ipath_user_add(struct ipath_devdata *dd);
-void ipath_user_remove(struct ipath_devdata *dd);
-
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
-
-extern int ipath_diag_inuse;
-
-irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-                    ipath_err_t err);
-#if __IPATH_INFO || __IPATH_DBG
-extern const char *ipath_ibcstatus_str[];
-#endif
-
-/* clean up any per-chip chip-specific stuff */
-void ipath_chip_cleanup(struct ipath_devdata *);
-/* clean up any chip type-specific stuff */
-void ipath_chip_done(void);
-
-void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
-                         unsigned cnt);
-void ipath_cancel_sends(struct ipath_devdata *, int);
-
-int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
-void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
-
-int ipath_parse_ushort(const char *str, unsigned short *valp);
-
-void ipath_kreceive(struct ipath_portdata *);
-int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
-int ipath_reset_device(int);
-void ipath_get_faststats(unsigned long);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-int ipath_set_linkstate(struct ipath_devdata *, u8);
-int ipath_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
-void ipath_enable_armlaunch(struct ipath_devdata *);
-void ipath_disable_armlaunch(struct ipath_devdata *);
-void ipath_hol_down(struct ipath_devdata *);
-void ipath_hol_up(struct ipath_devdata *);
-void ipath_hol_event(unsigned long);
-void ipath_toggle_rclkrls(struct ipath_devdata *);
-void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
-void ipath_set_relock_poll(struct ipath_devdata *, int);
-void ipath_shutdown_relock_poll(struct ipath_devdata *);
-
-/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
-#define subport_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->subport
-#define tidcursor_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->tidcursor
-#define user_sdma_queue_fp(fp) \
-       ((struct ipath_filedata *)(fp)->private_data)->pq
-
-/*
- * values for ipath_flags
- */
-               /* chip can report link latency (IB 1.2) */
-#define IPATH_HAS_LINK_LATENCY 0x1
-               /* The chip is up and initted */
-#define IPATH_INITTED       0x2
-               /* set if any user code has set kr_rcvhdrsize */
-#define IPATH_RCVHDRSZ_SET  0x4
-               /* The chip is present and valid for accesses */
-#define IPATH_PRESENT       0x8
-               /* HT link0 is only 8 bits wide, ignore upper byte crc
-                * errors, etc. */
-#define IPATH_8BIT_IN_HT0   0x10
-               /* HT link1 is only 8 bits wide, ignore upper byte crc
-                * errors, etc. */
-#define IPATH_8BIT_IN_HT1   0x20
-               /* The link is down */
-#define IPATH_LINKDOWN      0x40
-               /* The link level is up (0x11) */
-#define IPATH_LINKINIT      0x80
-               /* The link is in the armed (0x21) state */
-#define IPATH_LINKARMED     0x100
-               /* The link is in the active (0x31) state */
-#define IPATH_LINKACTIVE    0x200
-               /* link current state is unknown */
-#define IPATH_LINKUNK       0x400
-               /* Write combining flush needed for PIO */
-#define IPATH_PIO_FLUSH_WC  0x1000
-               /* DMA Receive tail pointer */
-#define IPATH_NODMA_RTAIL   0x2000
-               /* no IB cable, or no device on IB cable */
-#define IPATH_NOCABLE       0x4000
-               /* Supports port zero per packet receive interrupts via
-                * GPIO */
-#define IPATH_GPIO_INTR     0x8000
-               /* uses the coded 4byte TID, not 8 byte */
-#define IPATH_4BYTE_TID     0x10000
-               /* packet/word counters are 32 bit, else those 4 counters
-                * are 64bit */
-#define IPATH_32BITCOUNTERS 0x20000
-               /* Interrupt register is 64 bits */
-#define IPATH_INTREG_64     0x40000
-               /* can miss port0 rx interrupts */
-#define IPATH_DISABLED      0x80000 /* administratively disabled */
-               /* Use GPIO interrupts for new counters */
-#define IPATH_GPIO_ERRINTRS 0x100000
-#define IPATH_SWAP_PIOBUFS  0x200000
-               /* Supports Send DMA */
-#define IPATH_HAS_SEND_DMA  0x400000
-               /* Supports Send Count (not just word count) in PBC */
-#define IPATH_HAS_PBC_CNT   0x800000
-               /* Suppress heartbeat, even if turning off loopback */
-#define IPATH_NO_HRTBT      0x1000000
-#define IPATH_HAS_THRESH_UPDATE 0x4000000
-#define IPATH_HAS_MULT_IB_SPEED 0x8000000
-#define IPATH_IB_AUTONEG_INPROG 0x10000000
-#define IPATH_IB_AUTONEG_FAILED 0x20000000
-               /* Linkdown-disable intentionally, Do not attempt to bring up */
-#define IPATH_IB_LINK_DISABLED 0x40000000
-#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
-
-/* Bits in GPIO for the added interrupts */
-#define IPATH_GPIO_PORT0_BIT 2
-#define IPATH_GPIO_RXUVL_BIT 3
-#define IPATH_GPIO_OVRUN_BIT 4
-#define IPATH_GPIO_LLI_BIT 5
-#define IPATH_GPIO_ERRINTR_MASK 0x38
-
-/* portdata flag bit offsets */
-               /* waiting for a packet to arrive */
-#define IPATH_PORT_WAITING_RCV   2
-               /* master has not finished initializing */
-#define IPATH_PORT_MASTER_UNINIT 4
-               /* waiting for an urgent packet to arrive */
-#define IPATH_PORT_WAITING_URG 5
-
-/* free up any allocated data at closes */
-void ipath_free_data(struct ipath_portdata *dd);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-                               unsigned len, int avail);
-void ipath_init_iba6110_funcs(struct ipath_devdata *);
-void ipath_get_eeprom_info(struct ipath_devdata *);
-int ipath_update_eeprom_log(struct ipath_devdata *dd);
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
-u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
-void ipath_force_pio_avail_update(struct ipath_devdata *);
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
-
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
-
-/* send dma routines */
-int setup_sdma(struct ipath_devdata *);
-void teardown_sdma(struct ipath_devdata *);
-void ipath_restart_sdma(struct ipath_devdata *);
-void ipath_sdma_intr(struct ipath_devdata *);
-int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
-                         u32, struct ipath_verbs_txreq *);
-/* ipath_sdma_lock should be locked before calling this. */
-int ipath_sdma_make_progress(struct ipath_devdata *dd);
-
-/* must be called under ipath_sdma_lock */
-static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
-{
-       return dd->ipath_sdma_descq_cnt -
-               (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
-               1 - dd->ipath_sdma_desc_nreserved;
-}
-
-static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
-{
-       dd->ipath_sdma_desc_nreserved += cnt;
-}
-
-static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
-{
-       dd->ipath_sdma_desc_nreserved -= cnt;
-}
-
-/*
- * number of words used for protocol header if not set by ipath_userinit();
- */
-#define IPATH_DFLT_RCVHDRSIZE 9
-
-int ipath_get_user_pages(unsigned long, size_t, struct page **);
-void ipath_release_user_pages(struct page **, size_t);
-void ipath_release_user_pages_on_close(struct page **, size_t);
-int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
-int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
-int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
-int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
-
-/* these are used for the registers that vary with port */
-void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
-                          unsigned, u64);
-
-/*
- * We could have a single register get/put routine, that takes a group type,
- * but this is somewhat clearer and cleaner.  It also gives us some error
- * checking.  64 bit register reads should always work, but are inefficient
- * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
- * so we use kreg32 wherever possible.  User register and counter register
- * reads are always 32 bit reads, so only one form of those routines.
- */
-
-/*
- * At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg().
- */
-
-/**
- * ipath_read_ureg32 - read 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @port: port number
- *
- * Return the contents of a register that is virtualized to be per port.
- * Returns -1 on errors (not distinguishable from valid contents at
- * runtime; we may add a separate error variable at some point).
- */
-static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
-                                   ipath_ureg regno, int port)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-
-       return readl(regno + (u64 __iomem *)
-                    (dd->ipath_uregbase +
-                     (char __iomem *)dd->ipath_kregbase +
-                     dd->ipath_ureg_align * port));
-}
-
-/**
- * ipath_write_ureg - write 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @value: value
- * @port: port
- *
- * Write the contents of a register that is virtualized to be per port.
- */
-static inline void ipath_write_ureg(const struct ipath_devdata *dd,
-                                   ipath_ureg regno, u64 value, int port)
-{
-       u64 __iomem *ubase = (u64 __iomem *)
-               (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
-                dd->ipath_ureg_align * port);
-       if (dd->ipath_kregbase)
-               writeq(value, &ubase[regno]);
-}
-
-static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
-                                   ipath_kreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return -1;
-       return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
-                                   ipath_kreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return -1;
-
-       return readq(&dd->ipath_kregbase[regno]);
-}
-
-static inline void ipath_write_kreg(const struct ipath_devdata *dd,
-                                   ipath_kreg regno, u64 value)
-{
-       if (dd->ipath_kregbase)
-               writeq(value, &dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
-                                 ipath_sreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-
-       return readq(regno + (u64 __iomem *)
-                    (dd->ipath_cregbase +
-                     (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
-                                        ipath_sreg regno)
-{
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-               return 0;
-       return readl(regno + (u64 __iomem *)
-                    (dd->ipath_cregbase +
-                     (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_write_creg(const struct ipath_devdata *dd,
-                                   ipath_creg regno, u64 value)
-{
-       if (dd->ipath_kregbase)
-               writeq(value, regno + (u64 __iomem *)
-                      (dd->ipath_cregbase +
-                       (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
-{
-       *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
-}
-
-static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
-{
-       return (u32) le64_to_cpu(*((volatile __le64 *)
-                               pd->port_rcvhdrtail_kvaddr));
-}
-
-static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
-{
-       const struct ipath_devdata *dd = pd->port_dd;
-       u32 hdrqtail;
-
-       if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-               __le32 *rhf_addr;
-               u32 seq;
-
-               rhf_addr = (__le32 *) pd->port_rcvhdrq +
-                       pd->port_head + dd->ipath_rhf_offset;
-               seq = ipath_hdrget_seq(rhf_addr);
-               hdrqtail = pd->port_head;
-               if (seq == pd->port_seq_cnt)
-                       hdrqtail++;
-       } else
-               hdrqtail = ipath_get_rcvhdrtail(pd);
-
-       return hdrqtail;
-}
-
-static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
-{
-       return (dd->ipath_flags & IPATH_INTREG_64) ?
-               ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return linkstate
- * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
- * everywhere, anyway (and should be, for almost all purposes).
- */
-static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
-               INFINIPATH_IBCS_LINKSTATE_MASK;
-       if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
-               state = INFINIPATH_IBCS_L_STATE_ACTIVE;
-       return state;
-}
-
-/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
-static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
-{
-       return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-               dd->ibcs_lts_mask;
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return logical link state
- * combination of link state and linktraining state (down, active, init,
- * arm, etc.
- */
-static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
-{
-       u32 ibs;
-       ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-               dd->ibcs_lts_mask;
-       ibs |= (u32)(ibcs &
-               (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
-       return ibs;
-}
-
-/*
- * sysfs interface.
- */
-
-struct device_driver;
-
-extern const char ib_ipath_version[];
-
-extern const struct attribute_group *ipath_driver_attr_groups[];
-
-int ipath_device_create_group(struct device *, struct ipath_devdata *);
-void ipath_device_remove_group(struct device *, struct ipath_devdata *);
-int ipath_expose_reset(struct device *);
-
-int ipath_init_ipathfs(void);
-void ipath_exit_ipathfs(void);
-int ipathfs_add_device(struct ipath_devdata *);
-int ipathfs_remove_device(struct ipath_devdata *);
-
-/*
- * dma_addr wrappers - all 0's invalid for hw
- */
-dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
-                         size_t, int);
-dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
-const char *ipath_get_unit_name(int unit);
-
-/*
- * Flush write combining store buffers (if present) and perform a write
- * barrier.
- */
-#if defined(CONFIG_X86_64)
-#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
-#else
-#define ipath_flush_wc() wmb()
-#endif
-
-extern unsigned ipath_debug; /* debugging bit mask */
-extern unsigned ipath_linkrecovery;
-extern unsigned ipath_mtu4096;
-extern struct mutex ipath_mutex;
-
-#define IPATH_DRV_NAME         "ib_ipath"
-#define IPATH_MAJOR            233
-#define IPATH_USER_MINOR_BASE  0
-#define IPATH_DIAGPKT_MINOR    127
-#define IPATH_DIAG_MINOR_BASE  129
-#define IPATH_NMINORS          255
-
-#define ipath_dev_err(dd,fmt,...) \
-       do { \
-               const struct ipath_devdata *__dd = (dd); \
-               if (__dd->pcidev) \
-                       dev_err(&__dd->pcidev->dev, "%s: " fmt, \
-                               ipath_get_unit_name(__dd->ipath_unit), \
-                               ##__VA_ARGS__); \
-               else \
-                       printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
-                              ipath_get_unit_name(__dd->ipath_unit), \
-                              ##__VA_ARGS__); \
-       } while (0)
-
-#if _IPATH_DEBUGGING
-
-# define __IPATH_DBG_WHICH(which,fmt,...) \
-       do { \
-               if (unlikely(ipath_debug & (which))) \
-                       printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
-                              __func__,##__VA_ARGS__); \
-       } while(0)
-
-# define ipath_dbg(fmt,...) \
-       __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
-# define ipath_cdbg(which,fmt,...) \
-       __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
-
-#else /* ! _IPATH_DEBUGGING */
-
-# define ipath_dbg(fmt,...)
-# define ipath_cdbg(which,fmt,...)
-
-#endif /* _IPATH_DEBUGGING */
-
-/*
- * this is used for formatting hw error messages...
- */
-struct ipath_hwerror_msgs {
-       u64 mask;
-       const char *msg;
-};
-
-#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
-
-/* in ipath_intr.c... */
-void ipath_format_hwerrors(u64 hwerrs,
-                          const struct ipath_hwerror_msgs *hwerrmsgs,
-                          size_t nhwerrmsgs,
-                          char *msg, size_t lmsg);
-
-#endif                         /* _IPATH_KERNEL_H */
diff --git a/drivers/staging/rdma/ipath/ipath_keys.c b/drivers/staging/rdma/ipath/ipath_keys.c
deleted file mode 100644 (file)
index c0e933f..0000000
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <asm/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
- * @mr: memory region that this lkey protects
- *
- * Returns 1 if successful, otherwise returns 0.
- */
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
-{
-       unsigned long flags;
-       u32 r;
-       u32 n;
-       int ret;
-
-       spin_lock_irqsave(&rkt->lock, flags);
-
-       /* Find the next available LKEY */
-       r = n = rkt->next;
-       for (;;) {
-               if (rkt->table[r] == NULL)
-                       break;
-               r = (r + 1) & (rkt->max - 1);
-               if (r == n) {
-                       spin_unlock_irqrestore(&rkt->lock, flags);
-                       ipath_dbg("LKEY table full\n");
-                       ret = 0;
-                       goto bail;
-               }
-       }
-       rkt->next = (r + 1) & (rkt->max - 1);
-       /*
-        * Make sure lkey is never zero which is reserved to indicate an
-        * unrestricted LKEY.
-        */
-       rkt->gen++;
-       mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
-               ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
-                << 8);
-       if (mr->lkey == 0) {
-               mr->lkey |= 1 << 8;
-               rkt->gen++;
-       }
-       rkt->table[r] = mr;
-       spin_unlock_irqrestore(&rkt->lock, flags);
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
- */
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
-{
-       unsigned long flags;
-       u32 r;
-
-       if (lkey == 0)
-               return;
-       r = lkey >> (32 - ib_ipath_lkey_table_size);
-       spin_lock_irqsave(&rkt->lock, flags);
-       rkt->table[r] = NULL;
-       spin_unlock_irqrestore(&rkt->lock, flags);
-}
-
-/**
- * ipath_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-                 struct ib_sge *sge, int acc)
-{
-       struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-       struct ipath_mregion *mr;
-       unsigned n, m;
-       size_t off;
-       int ret;
-
-       /*
-        * We use LKEY == zero for kernel virtual addresses
-        * (see ipath_get_dma_mr and ipath_dma.c).
-        */
-       if (sge->lkey == 0) {
-               /* always a kernel port, no locking needed */
-               struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-               if (pd->user) {
-                       ret = 0;
-                       goto bail;
-               }
-               isge->mr = NULL;
-               isge->vaddr = (void *) sge->addr;
-               isge->length = sge->length;
-               isge->sge_length = sge->length;
-               ret = 1;
-               goto bail;
-       }
-       mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-       if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
-                    qp->ibqp.pd != mr->pd)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off = sge->addr - mr->user_base;
-       if (unlikely(sge->addr < mr->user_base ||
-                    off + sge->length > mr->length ||
-                    (mr->access_flags & acc) != acc)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off += mr->offset;
-       m = 0;
-       n = 0;
-       while (off >= mr->map[m]->segs[n].length) {
-               off -= mr->map[m]->segs[n].length;
-               n++;
-               if (n >= IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       isge->mr = mr;
-       isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-       isge->length = mr->map[m]->segs[n].length - off;
-       isge->sge_length = sge->length;
-       isge->m = m;
-       isge->n = n;
-
-       ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- */
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-                 u32 len, u64 vaddr, u32 rkey, int acc)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_lkey_table *rkt = &dev->lk_table;
-       struct ipath_sge *sge = &ss->sge;
-       struct ipath_mregion *mr;
-       unsigned n, m;
-       size_t off;
-       int ret;
-
-       /*
-        * We use RKEY == zero for kernel virtual addresses
-        * (see ipath_get_dma_mr and ipath_dma.c).
-        */
-       if (rkey == 0) {
-               /* always a kernel port, no locking needed */
-               struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-               if (pd->user) {
-                       ret = 0;
-                       goto bail;
-               }
-               sge->mr = NULL;
-               sge->vaddr = (void *) vaddr;
-               sge->length = len;
-               sge->sge_length = len;
-               ss->sg_list = NULL;
-               ss->num_sge = 1;
-               ret = 1;
-               goto bail;
-       }
-
-       mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-       if (unlikely(mr == NULL || mr->lkey != rkey ||
-                    qp->ibqp.pd != mr->pd)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off = vaddr - mr->iova;
-       if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-                    (mr->access_flags & acc) == 0)) {
-               ret = 0;
-               goto bail;
-       }
-
-       off += mr->offset;
-       m = 0;
-       n = 0;
-       while (off >= mr->map[m]->segs[n].length) {
-               off -= mr->map[m]->segs[n].length;
-               n++;
-               if (n >= IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       sge->mr = mr;
-       sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-       sge->length = mr->map[m]->segs[n].length - off;
-       sge->sge_length = len;
-       sge->m = m;
-       sge->n = n;
-       ss->sg_list = NULL;
-       ss->num_sge = 1;
-
-       ret = 1;
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mad.c b/drivers/staging/rdma/ipath/ipath_mad.c
deleted file mode 100644 (file)
index ad3a926..0000000
+++ /dev/null
@@ -1,1521 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_pma.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define IB_SMP_UNSUP_VERSION   cpu_to_be16(0x0004)
-#define IB_SMP_UNSUP_METHOD    cpu_to_be16(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
-#define IB_SMP_INVALID_FIELD   cpu_to_be16(0x001C)
-
-static int reply(struct ib_smp *smp)
-{
-       /*
-        * The verbs framework will handle the directed/LID route
-        * packet changes.
-        */
-       smp->method = IB_MGMT_METHOD_GET_RESP;
-       if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-               smp->status |= IB_SMP_DIRECTION;
-       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-static int recv_subn_get_nodedescription(struct ib_smp *smp,
-                                        struct ib_device *ibdev)
-{
-       if (smp->attr_mod)
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
-
-       return reply(smp);
-}
-
-struct nodeinfo {
-       u8 base_version;
-       u8 class_version;
-       u8 node_type;
-       u8 num_ports;
-       __be64 sys_guid;
-       __be64 node_guid;
-       __be64 port_guid;
-       __be16 partition_cap;
-       __be16 device_id;
-       __be32 revision;
-       u8 local_port_num;
-       u8 vendor_id[3];
-} __attribute__ ((packed));
-
-static int recv_subn_get_nodeinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
-       struct ipath_devdata *dd = to_idev(ibdev)->dd;
-       u32 vendor, majrev, minrev;
-
-       /* GUID 0 is illegal */
-       if (smp->attr_mod || (dd->ipath_guid == 0))
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       nip->base_version = 1;
-       nip->class_version = 1;
-       nip->node_type = 1;     /* channel adapter */
-       /*
-        * XXX The num_ports value will need a layer function to get
-        * the value if we ever have more than one IB port on a chip.
-        * We will also need to get the GUID for the port.
-        */
-       nip->num_ports = ibdev->phys_port_cnt;
-       /* This is already in network order */
-       nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-       nip->node_guid = dd->ipath_guid;
-       nip->port_guid = dd->ipath_guid;
-       nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
-       nip->device_id = cpu_to_be16(dd->ipath_deviceid);
-       majrev = dd->ipath_majrev;
-       minrev = dd->ipath_minrev;
-       nip->revision = cpu_to_be32((majrev << 16) | minrev);
-       nip->local_port_num = port;
-       vendor = dd->ipath_vendorid;
-       nip->vendor_id[0] = IPATH_SRC_OUI_1;
-       nip->vendor_id[1] = IPATH_SRC_OUI_2;
-       nip->vendor_id[2] = IPATH_SRC_OUI_3;
-
-       return reply(smp);
-}
-
-static int recv_subn_get_guidinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev)
-{
-       u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
-       __be64 *p = (__be64 *) smp->data;
-
-       /* 32 blocks of 8 64-bit GUIDs per block */
-
-       memset(smp->data, 0, sizeof(smp->data));
-
-       /*
-        * We only support one GUID for now.  If this changes, the
-        * portinfo.guid_cap field needs to be updated too.
-        */
-       if (startgx == 0) {
-               __be64 g = to_idev(ibdev)->dd->ipath_guid;
-               if (g == 0)
-                       /* GUID 0 is illegal */
-                       smp->status |= IB_SMP_INVALID_FIELD;
-               else
-                       /* The first is a copy of the read-only HW GUID. */
-                       *p = g;
-       } else
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return reply(smp);
-}
-
-static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
-{
-       (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
-}
-
-static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
-{
-       (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
-}
-
-static int get_overrunthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-/**
- * set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-static int get_phyerrthreshold(struct ipath_devdata *dd)
-{
-       return (dd->ipath_ibcctrl >>
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-/**
- * set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-       unsigned v;
-
-       v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-               INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-       if (v != n) {
-               dd->ipath_ibcctrl &=
-                       ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-               dd->ipath_ibcctrl |=
-                       (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                                dd->ipath_ibcctrl);
-       }
-       return 0;
-}
-
-/**
- * get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-static int get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-       return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-static int recv_subn_get_portinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct ipath_ibdev *dev;
-       struct ipath_devdata *dd;
-       struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-       u16 lid;
-       u8 ibcstat;
-       u8 mtu;
-       int ret;
-
-       if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
-               smp->status |= IB_SMP_INVALID_FIELD;
-               ret = reply(smp);
-               goto bail;
-       }
-
-       dev = to_idev(ibdev);
-       dd = dev->dd;
-
-       /* Clear all fields.  Only set the non-zero fields. */
-       memset(smp->data, 0, sizeof(smp->data));
-
-       /* Only return the mkey if the protection field allows it. */
-       if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
-           dev->mkeyprot == 0)
-               pip->mkey = dev->mkey;
-       pip->gid_prefix = dev->gid_prefix;
-       lid = dd->ipath_lid;
-       pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
-       pip->sm_lid = cpu_to_be16(dev->sm_lid);
-       pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
-       /* pip->diag_code; */
-       pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
-       pip->local_port_num = port;
-       pip->link_width_enabled = dd->ipath_link_width_enabled;
-       pip->link_width_supported = dd->ipath_link_width_supported;
-       pip->link_width_active = dd->ipath_link_width_active;
-       pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
-       ibcstat = dd->ipath_lastibcstat;
-       /* map LinkState to IB portinfo values.  */
-       pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
-
-       pip->portphysstate_linkdown =
-               (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
-               (get_linkdowndefaultstate(dd) ? 1 : 2);
-       pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
-       pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
-               dd->ipath_link_speed_enabled;
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
-               break;
-       case 2048:
-               mtu = IB_MTU_2048;
-               break;
-       case 1024:
-               mtu = IB_MTU_1024;
-               break;
-       case 512:
-               mtu = IB_MTU_512;
-               break;
-       case 256:
-               mtu = IB_MTU_256;
-               break;
-       default:                /* oops, something is wrong */
-               mtu = IB_MTU_2048;
-               break;
-       }
-       pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
-       pip->vlcap_inittype = 0x10;     /* VLCap = VL0, InitType = 0 */
-       pip->vl_high_limit = dev->vl_high_limit;
-       /* pip->vl_arb_high_cap; // only one VL */
-       /* pip->vl_arb_low_cap; // only one VL */
-       /* InitTypeReply = 0 */
-       /* our mtu cap depends on whether 4K MTU enabled or not */
-       pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-       /* HCAs ignore VLStallCount and HOQLife */
-       /* pip->vlstallcnt_hoqlife; */
-       pip->operationalvl_pei_peo_fpi_fpo = 0x10;      /* OVLs = 1 */
-       pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
-       /* P_KeyViolations are counted by hardware. */
-       pip->pkey_violations =
-               cpu_to_be16((ipath_get_cr_errpkey(dd) -
-                            dev->z_pkey_violations) & 0xFFFF);
-       pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
-       /* Only the hardware GUID is supported for now */
-       pip->guid_cap = 1;
-       pip->clientrereg_resv_subnetto = dev->subnet_timeout;
-       /* 32.768 usec. response time (guessing) */
-       pip->resv_resptimevalue = 3;
-       pip->localphyerrors_overrunerrors =
-               (get_phyerrthreshold(dd) << 4) |
-               get_overrunthreshold(dd);
-       /* pip->max_credit_hint; */
-       if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
-               u32 v;
-
-               v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
-               pip->link_roundtrip_latency[0] = v >> 16;
-               pip->link_roundtrip_latency[1] = v >> 8;
-               pip->link_roundtrip_latency[2] = v;
-       }
-
-       ret = reply(smp);
-
-bail:
-       return ret;
-}
-
-/**
- * get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-       /* always a kernel port, no locking needed */
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-
-       memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-       return 0;
-}
-
-static int recv_subn_get_pkeytable(struct ib_smp *smp,
-                                  struct ib_device *ibdev)
-{
-       u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-       u16 *p = (u16 *) smp->data;
-       __be16 *q = (__be16 *) smp->data;
-
-       /* 64 blocks of 32 16-bit P_Key entries */
-
-       memset(smp->data, 0, sizeof(smp->data));
-       if (startpx == 0) {
-               struct ipath_ibdev *dev = to_idev(ibdev);
-               unsigned i, n = ipath_get_npkeys(dev->dd);
-
-               get_pkeys(dev->dd, p);
-
-               for (i = 0; i < n; i++)
-                       q[i] = cpu_to_be16(p[i]);
-       } else
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return reply(smp);
-}
-
-static int recv_subn_set_guidinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev)
-{
-       /* The only GUID we support is the first read-only entry. */
-       return recv_subn_get_guidinfo(smp, ibdev);
-}
-
-/**
- * set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
-{
-       if (sleep)
-               dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       else
-               dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-                        dd->ipath_ibcctrl);
-       return 0;
-}
-
-/**
- * recv_subn_set_portinfo - set port information
- * @smp: the incoming SM packet
- * @ibdev: the infiniband device
- * @port: the port on the device
- *
- * Set Portinfo (see ch. 14.2.5.6).
- */
-static int recv_subn_set_portinfo(struct ib_smp *smp,
-                                 struct ib_device *ibdev, u8 port)
-{
-       struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-       struct ib_event event;
-       struct ipath_ibdev *dev;
-       struct ipath_devdata *dd;
-       char clientrereg = 0;
-       u16 lid, smlid;
-       u8 lwe;
-       u8 lse;
-       u8 state;
-       u16 lstate;
-       u32 mtu;
-       int ret, ore;
-
-       if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
-               goto err;
-
-       dev = to_idev(ibdev);
-       dd = dev->dd;
-       event.device = ibdev;
-       event.element.port_num = port;
-
-       dev->mkey = pip->mkey;
-       dev->gid_prefix = pip->gid_prefix;
-       dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
-
-       lid = be16_to_cpu(pip->lid);
-       if (dd->ipath_lid != lid ||
-           dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
-               /* Must be a valid unicast LID address. */
-               if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
-                       goto err;
-               ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
-               event.event = IB_EVENT_LID_CHANGE;
-               ib_dispatch_event(&event);
-       }
-
-       smlid = be16_to_cpu(pip->sm_lid);
-       if (smlid != dev->sm_lid) {
-               /* Must be a valid unicast LID address. */
-               if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
-                       goto err;
-               dev->sm_lid = smlid;
-               event.event = IB_EVENT_SM_CHANGE;
-               ib_dispatch_event(&event);
-       }
-
-       /* Allow 1x or 4x to be set (see 14.2.6.6). */
-       lwe = pip->link_width_enabled;
-       if (lwe) {
-               if (lwe == 0xFF)
-                       lwe = dd->ipath_link_width_supported;
-               else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
-                       goto err;
-               set_link_width_enabled(dd, lwe);
-       }
-
-       /* Allow 2.5 or 5.0 Gbs. */
-       lse = pip->linkspeedactive_enabled & 0xF;
-       if (lse) {
-               if (lse == 15)
-                       lse = dd->ipath_link_speed_supported;
-               else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
-                       goto err;
-               set_link_speed_enabled(dd, lse);
-       }
-
-       /* Set link down default state. */
-       switch (pip->portphysstate_linkdown & 0xF) {
-       case 0: /* NOP */
-               break;
-       case 1: /* SLEEP */
-               if (set_linkdowndefaultstate(dd, 1))
-                       goto err;
-               break;
-       case 2: /* POLL */
-               if (set_linkdowndefaultstate(dd, 0))
-                       goto err;
-               break;
-       default:
-               goto err;
-       }
-
-       dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-       dev->vl_high_limit = pip->vl_high_limit;
-
-       switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
-       case IB_MTU_256:
-               mtu = 256;
-               break;
-       case IB_MTU_512:
-               mtu = 512;
-               break;
-       case IB_MTU_1024:
-               mtu = 1024;
-               break;
-       case IB_MTU_2048:
-               mtu = 2048;
-               break;
-       case IB_MTU_4096:
-               if (!ipath_mtu4096)
-                       goto err;
-               mtu = 4096;
-               break;
-       default:
-               /* XXX We have already partially updated our state! */
-               goto err;
-       }
-       ipath_set_mtu(dd, mtu);
-
-       dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
-
-       /* We only support VL0 */
-       if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
-               goto err;
-
-       if (pip->mkey_violations == 0)
-               dev->mkey_violations = 0;
-
-       /*
-        * Hardware counter can't be reset so snapshot and subtract
-        * later.
-        */
-       if (pip->pkey_violations == 0)
-               dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
-
-       if (pip->qkey_violations == 0)
-               dev->qkey_violations = 0;
-
-       ore = pip->localphyerrors_overrunerrors;
-       if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
-               goto err;
-
-       if (set_overrunthreshold(dd, (ore & 0xF)))
-               goto err;
-
-       dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
-
-       if (pip->clientrereg_resv_subnetto & 0x80) {
-               clientrereg = 1;
-               event.event = IB_EVENT_CLIENT_REREGISTER;
-               ib_dispatch_event(&event);
-       }
-
-       /*
-        * Do the port state change now that the other link parameters
-        * have been set.
-        * Changing the port physical state only makes sense if the link
-        * is down or is being set to down.
-        */
-       state = pip->linkspeed_portstate & 0xF;
-       lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
-       if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
-               goto err;
-
-       /*
-        * Only state changes of DOWN, ARM, and ACTIVE are valid
-        * and must be in the correct state to take effect (see 7.2.6).
-        */
-       switch (state) {
-       case IB_PORT_NOP:
-               if (lstate == 0)
-                       break;
-               /* FALLTHROUGH */
-       case IB_PORT_DOWN:
-               if (lstate == 0)
-                       lstate = IPATH_IB_LINKDOWN_ONLY;
-               else if (lstate == 1)
-                       lstate = IPATH_IB_LINKDOWN_SLEEP;
-               else if (lstate == 2)
-                       lstate = IPATH_IB_LINKDOWN;
-               else if (lstate == 3)
-                       lstate = IPATH_IB_LINKDOWN_DISABLE;
-               else
-                       goto err;
-               ipath_set_linkstate(dd, lstate);
-               if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
-                       ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-                       goto done;
-               }
-               ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
-                               IPATH_LINKACTIVE, 1000);
-               break;
-       case IB_PORT_ARMED:
-               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-               break;
-       case IB_PORT_ACTIVE:
-               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-               break;
-       default:
-               /* XXX We have already partially updated our state! */
-               goto err;
-       }
-
-       ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-       if (clientrereg)
-               pip->clientrereg_resv_subnetto |= 0x80;
-
-       goto done;
-
-err:
-       smp->status |= IB_SMP_INVALID_FIELD;
-       ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-done:
-       return ret;
-}
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (dd->ipath_pkeys[i] != key)
-                       continue;
-               if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-                       dd->ipath_pkeys[i] = 0;
-                       ret = 1;
-                       goto bail;
-               }
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-       int i;
-       u16 lkey = key & 0x7FFF;
-       int any = 0;
-       int ret;
-
-       if (lkey == 0x7FFF) {
-               ret = 0;
-               goto bail;
-       }
-
-       /* Look for an empty slot or a matching PKEY. */
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i]) {
-                       any++;
-                       continue;
-               }
-               /* If it matches exactly, try to increment the ref count */
-               if (dd->ipath_pkeys[i] == key) {
-                       if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-                               ret = 0;
-                               goto bail;
-                       }
-                       /* Lost the race. Look for an empty slot below. */
-                       atomic_dec(&dd->ipath_pkeyrefs[i]);
-                       any++;
-               }
-               /*
-                * It makes no sense to have both the limited and unlimited
-                * PKEY set at the same time since the unlimited one will
-                * disable the limited one.
-                */
-               if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-                       ret = -EEXIST;
-                       goto bail;
-               }
-       }
-       if (!any) {
-               ret = -EBUSY;
-               goto bail;
-       }
-       for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-               if (!dd->ipath_pkeys[i] &&
-                   atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-                       /* for ipathstats, etc. */
-                       ipath_stats.sps_pkeys[i] = lkey;
-                       dd->ipath_pkeys[i] = key;
-                       ret = 1;
-                       goto bail;
-               }
-       }
-       ret = -EBUSY;
-
-bail:
-       return ret;
-}
-
-/**
- * set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
-{
-       struct ipath_portdata *pd;
-       int i;
-       int changed = 0;
-
-       /* always a kernel port, no locking needed */
-       pd = dd->ipath_pd[0];
-
-       for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-               u16 key = pkeys[i];
-               u16 okey = pd->port_pkeys[i];
-
-               if (key == okey)
-                       continue;
-               /*
-                * The value of this PKEY table entry is changing.
-                * Remove the old entry in the hardware's array of PKEYs.
-                */
-               if (okey & 0x7FFF)
-                       changed |= rm_pkey(dd, okey);
-               if (key & 0x7FFF) {
-                       int ret = add_pkey(dd, key);
-
-                       if (ret < 0)
-                               key = 0;
-                       else
-                               changed |= ret;
-               }
-               pd->port_pkeys[i] = key;
-       }
-       if (changed) {
-               u64 pkey;
-               struct ib_event event;
-
-               pkey = (u64) dd->ipath_pkeys[0] |
-                       ((u64) dd->ipath_pkeys[1] << 16) |
-                       ((u64) dd->ipath_pkeys[2] << 32) |
-                       ((u64) dd->ipath_pkeys[3] << 48);
-               ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-                          (unsigned long long) pkey);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-                                pkey);
-
-               event.event = IB_EVENT_PKEY_CHANGE;
-               event.device = &dd->verbs_dev->ibdev;
-               event.element.port_num = port;
-               ib_dispatch_event(&event);
-       }
-       return 0;
-}
-
-static int recv_subn_set_pkeytable(struct ib_smp *smp,
-                                  struct ib_device *ibdev, u8 port)
-{
-       u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-       __be16 *p = (__be16 *) smp->data;
-       u16 *q = (u16 *) smp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       unsigned i, n = ipath_get_npkeys(dev->dd);
-
-       for (i = 0; i < n; i++)
-               q[i] = be16_to_cpu(p[i]);
-
-       if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
-               smp->status |= IB_SMP_INVALID_FIELD;
-
-       return recv_subn_get_pkeytable(smp, ibdev);
-}
-
-static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
-{
-       struct ib_class_port_info *p =
-               (struct ib_class_port_info *)pmp->data;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       if (pmp->mad_hdr.attr_mod != 0)
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       /* Indicate AllPortSelect is valid (only one port anyway) */
-       p->capability_mask = cpu_to_be16(1 << 8);
-       p->base_version = 1;
-       p->class_version = 1;
-       /*
-        * Expected response time is 4.096 usec. * 2^18 == 1.073741824
-        * sec.
-        */
-       p->resp_time_value = 18;
-
-       return reply((struct ib_smp *) pmp);
-}
-
-/*
- * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
- * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
- * We support 5 counters which only count the mandatory quantities.
- */
-#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
-                                   COUNTER_MASK(1, 1) | \
-                                   COUNTER_MASK(1, 2) | \
-                                   COUNTER_MASK(1, 3) | \
-                                   COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
-                                          struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portsamplescontrol *p =
-               (struct ib_pma_portsamplescontrol *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       unsigned long flags;
-       u8 port_select = p->port_select;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-       /*
-        * Ticks are 10x the link transfer period which for 2.5Gbs is 4
-        * nsec.  0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec.  Sample
-        * intervals are counted in ticks.  Since we use Linux timers, that
-        * count in jiffies, we can't sample for less than 1000 ticks if HZ
-        * == 1000 (4000 ticks if HZ is 250).  link_speed_active returns 2 for
-        * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
-        * have hardware support for delaying packets.
-        */
-       if (crp->cr_psstat)
-               p->tick = dev->dd->ipath_link_speed_active - 1;
-       else
-               p->tick = 250;          /* 1 usec. */
-       p->counter_width = 4;   /* 32 bit counters */
-       p->counter_mask0_9 = COUNTER_MASK0_9;
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (crp->cr_psstat)
-               p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               p->sample_status = dev->pma_sample_status;
-       p->sample_start = cpu_to_be32(dev->pma_sample_start);
-       p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
-       p->tag = cpu_to_be16(dev->pma_tag);
-       p->counter_select[0] = dev->pma_counter_select[0];
-       p->counter_select[1] = dev->pma_counter_select[1];
-       p->counter_select[2] = dev->pma_counter_select[2];
-       p->counter_select[3] = dev->pma_counter_select[3];
-       p->counter_select[4] = dev->pma_counter_select[4];
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
-                                          struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portsamplescontrol *p =
-               (struct ib_pma_portsamplescontrol *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       unsigned long flags;
-       u8 status;
-       int ret;
-
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (p->port_select != port && p->port_select != 0xFF)) {
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-               ret = reply((struct ib_smp *) pmp);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       if (status == IB_PMA_SAMPLE_STATUS_DONE) {
-               dev->pma_sample_start = be32_to_cpu(p->sample_start);
-               dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
-               dev->pma_tag = be16_to_cpu(p->tag);
-               dev->pma_counter_select[0] = p->counter_select[0];
-               dev->pma_counter_select[1] = p->counter_select[1];
-               dev->pma_counter_select[2] = p->counter_select[2];
-               dev->pma_counter_select[3] = p->counter_select[3];
-               dev->pma_counter_select[4] = p->counter_select[4];
-               if (crp->cr_psstat) {
-                       ipath_write_creg(dev->dd, crp->cr_psinterval,
-                                        dev->pma_sample_interval);
-                       ipath_write_creg(dev->dd, crp->cr_psstart,
-                                        dev->pma_sample_start);
-               } else
-                       dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
-
-bail:
-       return ret;
-}
-
-static u64 get_counter(struct ipath_ibdev *dev,
-                      struct ipath_cregs const *crp,
-                      __be16 sel)
-{
-       u64 ret;
-
-       switch (sel) {
-       case IB_PMA_PORT_XMIT_DATA:
-               ret = (crp->cr_psxmitdatacount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
-                       dev->ipath_sword;
-               break;
-       case IB_PMA_PORT_RCV_DATA:
-               ret = (crp->cr_psrcvdatacount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
-                       dev->ipath_rword;
-               break;
-       case IB_PMA_PORT_XMIT_PKTS:
-               ret = (crp->cr_psxmitpktscount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
-                       dev->ipath_spkts;
-               break;
-       case IB_PMA_PORT_RCV_PKTS:
-               ret = (crp->cr_psrcvpktscount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
-                       dev->ipath_rpkts;
-               break;
-       case IB_PMA_PORT_XMIT_WAIT:
-               ret = (crp->cr_psxmitwaitcount) ?
-                       ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
-                       dev->ipath_xmit_wait;
-               break;
-       default:
-               ret = 0;
-       }
-
-       return ret;
-}
-
-static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
-                                         struct ib_device *ibdev)
-{
-       struct ib_pma_portsamplesresult *p =
-               (struct ib_pma_portsamplesresult *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       u8 status;
-       int i;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-       p->tag = cpu_to_be16(dev->pma_tag);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       p->sample_status = cpu_to_be16(status);
-       for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-               p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-                   cpu_to_be32(
-                       get_counter(dev, crp, dev->pma_counter_select[i]));
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
-                                             struct ib_device *ibdev)
-{
-       struct ib_pma_portsamplesresult_ext *p =
-               (struct ib_pma_portsamplesresult_ext *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-       u8 status;
-       int i;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-       p->tag = cpu_to_be16(dev->pma_tag);
-       if (crp->cr_psstat)
-               status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-       else
-               status = dev->pma_sample_status;
-       p->sample_status = cpu_to_be16(status);
-       /* 64 bits */
-       p->extended_width = cpu_to_be32(0x80000000);
-       for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-               p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-                   cpu_to_be64(
-                       get_counter(dev, crp, dev->pma_counter_select[i]));
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
-                                    struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_verbs_counters cntrs;
-       u8 port_select = p->port_select;
-
-       ipath_get_counters(dev->dd, &cntrs);
-
-       /* Adjust counters for any resets done. */
-       cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
-       cntrs.link_error_recovery_counter -=
-               dev->z_link_error_recovery_counter;
-       cntrs.link_downed_counter -= dev->z_link_downed_counter;
-       cntrs.port_rcv_errors += dev->rcv_errors;
-       cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
-       cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
-       cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
-       cntrs.port_xmit_data -= dev->z_port_xmit_data;
-       cntrs.port_rcv_data -= dev->z_port_rcv_data;
-       cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
-       cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
-       cntrs.local_link_integrity_errors -=
-               dev->z_local_link_integrity_errors;
-       cntrs.excessive_buffer_overrun_errors -=
-               dev->z_excessive_buffer_overrun_errors;
-       cntrs.vl15_dropped -= dev->z_vl15_dropped;
-       cntrs.vl15_dropped += dev->n_vl15_dropped;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       if (cntrs.symbol_error_counter > 0xFFFFUL)
-               p->symbol_error_counter = cpu_to_be16(0xFFFF);
-       else
-               p->symbol_error_counter =
-                       cpu_to_be16((u16)cntrs.symbol_error_counter);
-       if (cntrs.link_error_recovery_counter > 0xFFUL)
-               p->link_error_recovery_counter = 0xFF;
-       else
-               p->link_error_recovery_counter =
-                       (u8)cntrs.link_error_recovery_counter;
-       if (cntrs.link_downed_counter > 0xFFUL)
-               p->link_downed_counter = 0xFF;
-       else
-               p->link_downed_counter = (u8)cntrs.link_downed_counter;
-       if (cntrs.port_rcv_errors > 0xFFFFUL)
-               p->port_rcv_errors = cpu_to_be16(0xFFFF);
-       else
-               p->port_rcv_errors =
-                       cpu_to_be16((u16) cntrs.port_rcv_errors);
-       if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
-               p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
-       else
-               p->port_rcv_remphys_errors =
-                       cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
-       if (cntrs.port_xmit_discards > 0xFFFFUL)
-               p->port_xmit_discards = cpu_to_be16(0xFFFF);
-       else
-               p->port_xmit_discards =
-                       cpu_to_be16((u16)cntrs.port_xmit_discards);
-       if (cntrs.local_link_integrity_errors > 0xFUL)
-               cntrs.local_link_integrity_errors = 0xFUL;
-       if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
-               cntrs.excessive_buffer_overrun_errors = 0xFUL;
-       p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
-               cntrs.excessive_buffer_overrun_errors;
-       if (cntrs.vl15_dropped > 0xFFFFUL)
-               p->vl15_dropped = cpu_to_be16(0xFFFF);
-       else
-               p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
-       if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
-               p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
-       if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
-               p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
-       if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
-               p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_xmit_packets =
-                       cpu_to_be32((u32)cntrs.port_xmit_packets);
-       if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
-               p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
-       else
-               p->port_rcv_packets =
-                       cpu_to_be32((u32) cntrs.port_rcv_packets);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
-                                        struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters_ext *p =
-               (struct ib_pma_portcounters_ext *)pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       u64 swords, rwords, spkts, rpkts, xwait;
-       u8 port_select = p->port_select;
-
-       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                               &rpkts, &xwait);
-
-       /* Adjust counters for any resets done. */
-       swords -= dev->z_port_xmit_data;
-       rwords -= dev->z_port_rcv_data;
-       spkts -= dev->z_port_xmit_packets;
-       rpkts -= dev->z_port_rcv_packets;
-
-       memset(pmp->data, 0, sizeof(pmp->data));
-
-       p->port_select = port_select;
-       if (pmp->mad_hdr.attr_mod != 0 ||
-           (port_select != port && port_select != 0xFF))
-               pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-       p->port_xmit_data = cpu_to_be64(swords);
-       p->port_rcv_data = cpu_to_be64(rwords);
-       p->port_xmit_packets = cpu_to_be64(spkts);
-       p->port_rcv_packets = cpu_to_be64(rpkts);
-       p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
-       p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
-       p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
-       p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
-
-       return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
-                                    struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_verbs_counters cntrs;
-
-       /*
-        * Since the HW doesn't support clearing counters, we save the
-        * current count and subtract it from future responses.
-        */
-       ipath_get_counters(dev->dd, &cntrs);
-
-       if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
-               dev->z_symbol_error_counter = cntrs.symbol_error_counter;
-
-       if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
-               dev->z_link_error_recovery_counter =
-                       cntrs.link_error_recovery_counter;
-
-       if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
-               dev->z_link_downed_counter = cntrs.link_downed_counter;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
-               dev->z_port_rcv_errors =
-                       cntrs.port_rcv_errors + dev->rcv_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
-               dev->z_port_rcv_remphys_errors =
-                       cntrs.port_rcv_remphys_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
-               dev->z_port_xmit_discards = cntrs.port_xmit_discards;
-
-       if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
-               dev->z_local_link_integrity_errors =
-                       cntrs.local_link_integrity_errors;
-
-       if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
-               dev->z_excessive_buffer_overrun_errors =
-                       cntrs.excessive_buffer_overrun_errors;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-               dev->n_vl15_dropped = 0;
-               dev->z_vl15_dropped = cntrs.vl15_dropped;
-       }
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
-               dev->z_port_xmit_data = cntrs.port_xmit_data;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
-               dev->z_port_rcv_data = cntrs.port_rcv_data;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
-               dev->z_port_xmit_packets = cntrs.port_xmit_packets;
-
-       if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
-               dev->z_port_rcv_packets = cntrs.port_rcv_packets;
-
-       return recv_pma_get_portcounters(pmp, ibdev, port);
-}
-
-static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
-                                        struct ib_device *ibdev, u8 port)
-{
-       struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-               pmp->data;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       u64 swords, rwords, spkts, rpkts, xwait;
-
-       ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-                               &rpkts, &xwait);
-
-       if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
-               dev->z_port_xmit_data = swords;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
-               dev->z_port_rcv_data = rwords;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
-               dev->z_port_xmit_packets = spkts;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
-               dev->z_port_rcv_packets = rpkts;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-               dev->n_unicast_xmit = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-               dev->n_unicast_rcv = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-               dev->n_multicast_xmit = 0;
-
-       if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-               dev->n_multicast_rcv = 0;
-
-       return recv_pma_get_portcounters_ext(pmp, ibdev, port);
-}
-
-static int process_subn(struct ib_device *ibdev, int mad_flags,
-                       u8 port_num, const struct ib_mad *in_mad,
-                       struct ib_mad *out_mad)
-{
-       struct ib_smp *smp = (struct ib_smp *)out_mad;
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       *out_mad = *in_mad;
-       if (smp->class_version != 1) {
-               smp->status |= IB_SMP_UNSUP_VERSION;
-               ret = reply(smp);
-               goto bail;
-       }
-
-       /* Is the mkey in the process of expiring? */
-       if (dev->mkey_lease_timeout &&
-           time_after_eq(jiffies, dev->mkey_lease_timeout)) {
-               /* Clear timeout and mkey protection field. */
-               dev->mkey_lease_timeout = 0;
-               dev->mkeyprot = 0;
-       }
-
-       /*
-        * M_Key checking depends on
-        * Portinfo:M_Key_protect_bits
-        */
-       if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
-           dev->mkey != smp->mkey &&
-           (smp->method == IB_MGMT_METHOD_SET ||
-            (smp->method == IB_MGMT_METHOD_GET &&
-             dev->mkeyprot >= 2))) {
-               if (dev->mkey_violations != 0xFFFF)
-                       ++dev->mkey_violations;
-               if (dev->mkey_lease_timeout ||
-                   dev->mkey_lease_period == 0) {
-                       ret = IB_MAD_RESULT_SUCCESS |
-                               IB_MAD_RESULT_CONSUMED;
-                       goto bail;
-               }
-               dev->mkey_lease_timeout = jiffies +
-                       dev->mkey_lease_period * HZ;
-               /* Future: Generate a trap notice. */
-               ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-               goto bail;
-       } else if (dev->mkey_lease_timeout)
-               dev->mkey_lease_timeout = 0;
-
-       switch (smp->method) {
-       case IB_MGMT_METHOD_GET:
-               switch (smp->attr_id) {
-               case IB_SMP_ATTR_NODE_DESC:
-                       ret = recv_subn_get_nodedescription(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_NODE_INFO:
-                       ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_GUID_INFO:
-                       ret = recv_subn_get_guidinfo(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_PORT_INFO:
-                       ret = recv_subn_get_portinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_PKEY_TABLE:
-                       ret = recv_subn_get_pkeytable(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_SM_INFO:
-                       if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-                               ret = IB_MAD_RESULT_SUCCESS |
-                                       IB_MAD_RESULT_CONSUMED;
-                               goto bail;
-                       }
-                       if (dev->port_cap_flags & IB_PORT_SM) {
-                               ret = IB_MAD_RESULT_SUCCESS;
-                               goto bail;
-                       }
-                       /* FALLTHROUGH */
-               default:
-                       smp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply(smp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_SET:
-               switch (smp->attr_id) {
-               case IB_SMP_ATTR_GUID_INFO:
-                       ret = recv_subn_set_guidinfo(smp, ibdev);
-                       goto bail;
-               case IB_SMP_ATTR_PORT_INFO:
-                       ret = recv_subn_set_portinfo(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_PKEY_TABLE:
-                       ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
-                       goto bail;
-               case IB_SMP_ATTR_SM_INFO:
-                       if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-                               ret = IB_MAD_RESULT_SUCCESS |
-                                       IB_MAD_RESULT_CONSUMED;
-                               goto bail;
-                       }
-                       if (dev->port_cap_flags & IB_PORT_SM) {
-                               ret = IB_MAD_RESULT_SUCCESS;
-                               goto bail;
-                       }
-                       /* FALLTHROUGH */
-               default:
-                       smp->status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply(smp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_TRAP:
-       case IB_MGMT_METHOD_REPORT:
-       case IB_MGMT_METHOD_REPORT_RESP:
-       case IB_MGMT_METHOD_TRAP_REPRESS:
-       case IB_MGMT_METHOD_GET_RESP:
-               /*
-                * The ib_mad module will call us to process responses
-                * before checking for other consumers.
-                * Just tell the caller to process it normally.
-                */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-       default:
-               smp->status |= IB_SMP_UNSUP_METHOD;
-               ret = reply(smp);
-       }
-
-bail:
-       return ret;
-}
-
-static int process_perf(struct ib_device *ibdev, u8 port_num,
-                       const struct ib_mad *in_mad,
-                       struct ib_mad *out_mad)
-{
-       struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
-       int ret;
-
-       *out_mad = *in_mad;
-       if (pmp->mad_hdr.class_version != 1) {
-               pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
-               ret = reply((struct ib_smp *) pmp);
-               goto bail;
-       }
-
-       switch (pmp->mad_hdr.method) {
-       case IB_MGMT_METHOD_GET:
-               switch (pmp->mad_hdr.attr_id) {
-               case IB_PMA_CLASS_PORT_INFO:
-                       ret = recv_pma_get_classportinfo(pmp);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_CONTROL:
-                       ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
-                                                             port_num);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_RESULT:
-                       ret = recv_pma_get_portsamplesresult(pmp, ibdev);
-                       goto bail;
-               case IB_PMA_PORT_SAMPLES_RESULT_EXT:
-                       ret = recv_pma_get_portsamplesresult_ext(pmp,
-                                                                ibdev);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS:
-                       ret = recv_pma_get_portcounters(pmp, ibdev,
-                                                       port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS_EXT:
-                       ret = recv_pma_get_portcounters_ext(pmp, ibdev,
-                                                           port_num);
-                       goto bail;
-               default:
-                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) pmp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_SET:
-               switch (pmp->mad_hdr.attr_id) {
-               case IB_PMA_PORT_SAMPLES_CONTROL:
-                       ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
-                                                             port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS:
-                       ret = recv_pma_set_portcounters(pmp, ibdev,
-                                                       port_num);
-                       goto bail;
-               case IB_PMA_PORT_COUNTERS_EXT:
-                       ret = recv_pma_set_portcounters_ext(pmp, ibdev,
-                                                           port_num);
-                       goto bail;
-               default:
-                       pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-                       ret = reply((struct ib_smp *) pmp);
-                       goto bail;
-               }
-
-       case IB_MGMT_METHOD_GET_RESP:
-               /*
-                * The ib_mad module will call us to process responses
-                * before checking for other consumers.
-                * Just tell the caller to process it normally.
-                */
-               ret = IB_MAD_RESULT_SUCCESS;
-               goto bail;
-       default:
-               pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
-               ret = reply((struct ib_smp *) pmp);
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_process_mad - process an incoming MAD packet
- * @ibdev: the infiniband device this packet came in on
- * @mad_flags: MAD flags
- * @port_num: the port number this packet came in on
- * @in_wc: the work completion entry for this packet
- * @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
- *
- * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
- * interested in processing.
- *
- * Note that the verbs framework has already done the MAD sanity checks,
- * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
- * MADs.
- *
- * This is called by the ib_mad module.
- */
-int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-                     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                     const struct ib_mad_hdr *in, size_t in_mad_size,
-                     struct ib_mad_hdr *out, size_t *out_mad_size,
-                     u16 *out_mad_pkey_index)
-{
-       int ret;
-       const struct ib_mad *in_mad = (const struct ib_mad *)in;
-       struct ib_mad *out_mad = (struct ib_mad *)out;
-
-       if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-                        *out_mad_size != sizeof(*out_mad)))
-               return IB_MAD_RESULT_FAILURE;
-
-       switch (in_mad->mad_hdr.mgmt_class) {
-       case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-       case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-               ret = process_subn(ibdev, mad_flags, port_num,
-                                  in_mad, out_mad);
-               goto bail;
-       case IB_MGMT_CLASS_PERF_MGMT:
-               ret = process_perf(ibdev, port_num, in_mad, out_mad);
-               goto bail;
-       default:
-               ret = IB_MAD_RESULT_SUCCESS;
-       }
-
-bail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mmap.c b/drivers/staging/rdma/ipath/ipath_mmap.c
deleted file mode 100644 (file)
index e732742..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct ipath_mmap_info
- */
-void ipath_release_mmap_info(struct kref *ref)
-{
-       struct ipath_mmap_info *ip =
-               container_of(ref, struct ipath_mmap_info, ref);
-       struct ipath_ibdev *dev = to_idev(ip->context->device);
-
-       spin_lock_irq(&dev->pending_lock);
-       list_del(&ip->pending_mmaps);
-       spin_unlock_irq(&dev->pending_lock);
-
-       vfree(ip->obj);
-       kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void ipath_vma_open(struct vm_area_struct *vma)
-{
-       struct ipath_mmap_info *ip = vma->vm_private_data;
-
-       kref_get(&ip->ref);
-}
-
-static void ipath_vma_close(struct vm_area_struct *vma)
-{
-       struct ipath_mmap_info *ip = vma->vm_private_data;
-
-       kref_put(&ip->ref, ipath_release_mmap_info);
-}
-
-static const struct vm_operations_struct ipath_vm_ops = {
-       .open =     ipath_vma_open,
-       .close =    ipath_vma_close,
-};
-
-/**
- * ipath_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-       struct ipath_ibdev *dev = to_idev(context->device);
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-       unsigned long size = vma->vm_end - vma->vm_start;
-       struct ipath_mmap_info *ip, *pp;
-       int ret = -EINVAL;
-
-       /*
-        * Search the device's list of objects waiting for a mmap call.
-        * Normally, this list is very short since a call to create a
-        * CQ, QP, or SRQ is soon followed by a call to mmap().
-        */
-       spin_lock_irq(&dev->pending_lock);
-       list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-                                pending_mmaps) {
-               /* Only the creator is allowed to mmap the object */
-               if (context != ip->context || (__u64) offset != ip->offset)
-                       continue;
-               /* Don't allow a mmap larger than the object. */
-               if (size > ip->size)
-                       break;
-
-               list_del_init(&ip->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-
-               ret = remap_vmalloc_range(vma, ip->obj, 0);
-               if (ret)
-                       goto done;
-               vma->vm_ops = &ipath_vm_ops;
-               vma->vm_private_data = ip;
-               ipath_vma_open(vma);
-               goto done;
-       }
-       spin_unlock_irq(&dev->pending_lock);
-done:
-       return ret;
-}
-
-/*
- * Allocate information for ipath_mmap
- */
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-                                              u32 size,
-                                              struct ib_ucontext *context,
-                                              void *obj) {
-       struct ipath_mmap_info *ip;
-
-       ip = kmalloc(sizeof *ip, GFP_KERNEL);
-       if (!ip)
-               goto bail;
-
-       size = PAGE_ALIGN(size);
-
-       spin_lock_irq(&dev->mmap_offset_lock);
-       if (dev->mmap_offset == 0)
-               dev->mmap_offset = PAGE_SIZE;
-       ip->offset = dev->mmap_offset;
-       dev->mmap_offset += size;
-       spin_unlock_irq(&dev->mmap_offset_lock);
-
-       INIT_LIST_HEAD(&ip->pending_mmaps);
-       ip->size = size;
-       ip->context = context;
-       ip->obj = obj;
-       kref_init(&ip->ref);
-
-bail:
-       return ip;
-}
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-                           struct ipath_mmap_info *ip,
-                           u32 size, void *obj) {
-       size = PAGE_ALIGN(size);
-
-       spin_lock_irq(&dev->mmap_offset_lock);
-       if (dev->mmap_offset == 0)
-               dev->mmap_offset = PAGE_SIZE;
-       ip->offset = dev->mmap_offset;
-       dev->mmap_offset += size;
-       spin_unlock_irq(&dev->mmap_offset_lock);
-
-       ip->size = size;
-       ip->obj = obj;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
deleted file mode 100644 (file)
index c7278f6..0000000
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/slab.h>
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-
-/* Fast memory region */
-struct ipath_fmr {
-       struct ib_fmr ibfmr;
-       u8 page_shift;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-       return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
-/**
- * ipath_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see ipath_dma.c).
- */
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
-{
-       struct ipath_mr *mr;
-       struct ib_mr *ret;
-
-       mr = kzalloc(sizeof *mr, GFP_KERNEL);
-       if (!mr) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       mr->mr.access_flags = acc;
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-static struct ipath_mr *alloc_mr(int count,
-                                struct ipath_lkey_table *lk_table)
-{
-       struct ipath_mr *mr;
-       int m, i = 0;
-
-       /* Allocate struct plus pointers to first level page tables. */
-       m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-       mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
-       if (!mr)
-               goto done;
-
-       /* Allocate first level page tables. */
-       for (; i < m; i++) {
-               mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
-               if (!mr->mr.map[i])
-                       goto bail;
-       }
-       mr->mr.mapsz = m;
-
-       /*
-        * ib_reg_phys_mr() will initialize mr->ibmr except for
-        * lkey and rkey.
-        */
-       if (!ipath_alloc_lkey(lk_table, &mr->mr))
-               goto bail;
-       mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
-
-       goto done;
-
-bail:
-       while (i) {
-               i--;
-               kfree(mr->mr.map[i]);
-       }
-       kfree(mr);
-       mr = NULL;
-
-done:
-       return mr;
-}
-
-/**
- * ipath_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
-                               struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, int acc, u64 *iova_start)
-{
-       struct ipath_mr *mr;
-       int n, m, i;
-       struct ib_mr *ret;
-
-       mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
-       if (mr == NULL) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       mr->mr.pd = pd;
-       mr->mr.user_base = *iova_start;
-       mr->mr.iova = *iova_start;
-       mr->mr.length = 0;
-       mr->mr.offset = 0;
-       mr->mr.access_flags = acc;
-       mr->mr.max_segs = num_phys_buf;
-       mr->umem = NULL;
-
-       m = 0;
-       n = 0;
-       for (i = 0; i < num_phys_buf; i++) {
-               mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
-               mr->mr.map[m]->segs[n].length = buffer_list[i].size;
-               mr->mr.length += buffer_list[i].size;
-               n++;
-               if (n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the InfiniPath driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                               u64 virt_addr, int mr_access_flags,
-                               struct ib_udata *udata)
-{
-       struct ipath_mr *mr;
-       struct ib_umem *umem;
-       int n, m, entry;
-       struct scatterlist *sg;
-       struct ib_mr *ret;
-
-       if (length == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       umem = ib_umem_get(pd->uobject->context, start, length,
-                          mr_access_flags, 0);
-       if (IS_ERR(umem))
-               return (void *) umem;
-
-       n = umem->nmap;
-       mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
-       if (!mr) {
-               ret = ERR_PTR(-ENOMEM);
-               ib_umem_release(umem);
-               goto bail;
-       }
-
-       mr->mr.pd = pd;
-       mr->mr.user_base = start;
-       mr->mr.iova = virt_addr;
-       mr->mr.length = length;
-       mr->mr.offset = ib_umem_offset(umem);
-       mr->mr.access_flags = mr_access_flags;
-       mr->mr.max_segs = n;
-       mr->umem = umem;
-
-       m = 0;
-       n = 0;
-       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-               void *vaddr;
-
-               vaddr = page_address(sg_page(sg));
-               if (!vaddr) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-               mr->mr.map[m]->segs[n].vaddr = vaddr;
-               mr->mr.map[m]->segs[n].length = umem->page_size;
-               n++;
-               if (n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       ret = &mr->ibmr;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by ipath_get_dma_mr()
- * or ipath_reg_user_mr().
- */
-int ipath_dereg_mr(struct ib_mr *ibmr)
-{
-       struct ipath_mr *mr = to_imr(ibmr);
-       int i;
-
-       ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
-       i = mr->mr.mapsz;
-       while (i) {
-               i--;
-               kfree(mr->mr.map[i]);
-       }
-
-       if (mr->umem)
-               ib_umem_release(mr->umem);
-
-       kfree(mr);
-       return 0;
-}
-
-/**
- * ipath_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                              struct ib_fmr_attr *fmr_attr)
-{
-       struct ipath_fmr *fmr;
-       int m, i = 0;
-       struct ib_fmr *ret;
-
-       /* Allocate struct plus pointers to first level page tables. */
-       m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-       fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
-       if (!fmr)
-               goto bail;
-
-       /* Allocate first level page tables. */
-       for (; i < m; i++) {
-               fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
-                                        GFP_KERNEL);
-               if (!fmr->mr.map[i])
-                       goto bail;
-       }
-       fmr->mr.mapsz = m;
-
-       /*
-        * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-        * rkey.
-        */
-       if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
-               goto bail;
-       fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
-       /*
-        * Resources are allocated but no valid mapping (RKEY can't be
-        * used).
-        */
-       fmr->mr.pd = pd;
-       fmr->mr.user_base = 0;
-       fmr->mr.iova = 0;
-       fmr->mr.length = 0;
-       fmr->mr.offset = 0;
-       fmr->mr.access_flags = mr_access_flags;
-       fmr->mr.max_segs = fmr_attr->max_pages;
-       fmr->page_shift = fmr_attr->page_shift;
-
-       ret = &fmr->ibfmr;
-       goto done;
-
-bail:
-       while (i)
-               kfree(fmr->mr.map[--i]);
-       kfree(fmr);
-       ret = ERR_PTR(-ENOMEM);
-
-done:
-       return ret;
-}
-
-/**
- * ipath_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-                      int list_len, u64 iova)
-{
-       struct ipath_fmr *fmr = to_ifmr(ibfmr);
-       struct ipath_lkey_table *rkt;
-       unsigned long flags;
-       int m, n, i;
-       u32 ps;
-       int ret;
-
-       if (list_len > fmr->mr.max_segs) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       rkt = &to_idev(ibfmr->device)->lk_table;
-       spin_lock_irqsave(&rkt->lock, flags);
-       fmr->mr.user_base = iova;
-       fmr->mr.iova = iova;
-       ps = 1 << fmr->page_shift;
-       fmr->mr.length = list_len * ps;
-       m = 0;
-       n = 0;
-       ps = 1 << fmr->page_shift;
-       for (i = 0; i < list_len; i++) {
-               fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-               fmr->mr.map[m]->segs[n].length = ps;
-               if (++n == IPATH_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-       spin_unlock_irqrestore(&rkt->lock, flags);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int ipath_unmap_fmr(struct list_head *fmr_list)
-{
-       struct ipath_fmr *fmr;
-       struct ipath_lkey_table *rkt;
-       unsigned long flags;
-
-       list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-               rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-               spin_lock_irqsave(&rkt->lock, flags);
-               fmr->mr.user_base = 0;
-               fmr->mr.iova = 0;
-               fmr->mr.length = 0;
-               spin_unlock_irqrestore(&rkt->lock, flags);
-       }
-       return 0;
-}
-
-/**
- * ipath_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-       struct ipath_fmr *fmr = to_ifmr(ibfmr);
-       int i;
-
-       ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
-       i = fmr->mr.mapsz;
-       while (i)
-               kfree(fmr->mr.map[--i]);
-       kfree(fmr);
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_qp.c b/drivers/staging/rdma/ipath/ipath_qp.c
deleted file mode 100644 (file)
index 280cd2d..0000000
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-#define BITS_PER_PAGE          (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK     (BITS_PER_PAGE-1)
-#define mk_qpn(qpt, map, off)  (((map) - (qpt)->map) * BITS_PER_PAGE + \
-                                (off))
-#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
-                                                     BITS_PER_PAGE, off)
-
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
-       0,                      /* 0 */
-       1,                      /* 1 */
-       2,                      /* 2 */
-       3,                      /* 3 */
-       4,                      /* 4 */
-       6,                      /* 5 */
-       8,                      /* 6 */
-       12,                     /* 7 */
-       16,                     /* 8 */
-       24,                     /* 9 */
-       32,                     /* A */
-       48,                     /* B */
-       64,                     /* C */
-       96,                     /* D */
-       128,                    /* E */
-       192,                    /* F */
-       256,                    /* 10 */
-       384,                    /* 11 */
-       512,                    /* 12 */
-       768,                    /* 13 */
-       1024,                   /* 14 */
-       1536,                   /* 15 */
-       2048,                   /* 16 */
-       3072,                   /* 17 */
-       4096,                   /* 18 */
-       6144,                   /* 19 */
-       8192,                   /* 1A */
-       12288,                  /* 1B */
-       16384,                  /* 1C */
-       24576,                  /* 1D */
-       32768                   /* 1E */
-};
-
-
-static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
-{
-       unsigned long page = get_zeroed_page(GFP_KERNEL);
-       unsigned long flags;
-
-       /*
-        * Free the page if someone raced with us installing it.
-        */
-
-       spin_lock_irqsave(&qpt->lock, flags);
-       if (map->page)
-               free_page(page);
-       else
-               map->page = (void *)page;
-       spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-
-static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
-{
-       u32 i, offset, max_scan, qpn;
-       struct qpn_map *map;
-       u32 ret = -1;
-
-       if (type == IB_QPT_SMI)
-               ret = 0;
-       else if (type == IB_QPT_GSI)
-               ret = 1;
-
-       if (ret != -1) {
-               map = &qpt->map[0];
-               if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
-                       if (unlikely(!map->page)) {
-                               ret = -ENOMEM;
-                               goto bail;
-                       }
-               }
-               if (!test_and_set_bit(ret, map->page))
-                       atomic_dec(&map->n_free);
-               else
-                       ret = -EBUSY;
-               goto bail;
-       }
-
-       qpn = qpt->last + 1;
-       if (qpn >= QPN_MAX)
-               qpn = 2;
-       offset = qpn & BITS_PER_PAGE_MASK;
-       map = &qpt->map[qpn / BITS_PER_PAGE];
-       max_scan = qpt->nmaps - !offset;
-       for (i = 0;;) {
-               if (unlikely(!map->page)) {
-                       get_map_page(qpt, map);
-                       if (unlikely(!map->page))
-                               break;
-               }
-               if (likely(atomic_read(&map->n_free))) {
-                       do {
-                               if (!test_and_set_bit(offset, map->page)) {
-                                       atomic_dec(&map->n_free);
-                                       qpt->last = qpn;
-                                       ret = qpn;
-                                       goto bail;
-                               }
-                               offset = find_next_offset(map, offset);
-                               qpn = mk_qpn(qpt, map, offset);
-                               /*
-                                * This test differs from alloc_pidmap().
-                                * If find_next_offset() does find a zero
-                                * bit, we don't need to check for QPN
-                                * wrapping around past our starting QPN.
-                                * We just need to be sure we don't loop
-                                * forever.
-                                */
-                       } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-               }
-               /*
-                * In order to keep the number of pages allocated to a
-                * minimum, we scan the all existing pages before increasing
-                * the size of the bitmap table.
-                */
-               if (++i > max_scan) {
-                       if (qpt->nmaps == QPNMAP_ENTRIES)
-                               break;
-                       map = &qpt->map[qpt->nmaps++];
-                       offset = 0;
-               } else if (map < &qpt->map[qpt->nmaps]) {
-                       ++map;
-                       offset = 0;
-               } else {
-                       map = &qpt->map[0];
-                       offset = 2;
-               }
-               qpn = mk_qpn(qpt, map, offset);
-       }
-
-       ret = -ENOMEM;
-
-bail:
-       return ret;
-}
-
-static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-       struct qpn_map *map;
-
-       map = qpt->map + qpn / BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-       atomic_inc(&map->n_free);
-}
-
-/**
- * ipath_alloc_qpn - allocate a QP number
- * @qpt: the QP table
- * @qp: the QP
- * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
- *
- * Allocate the next available QPN and put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
-                          enum ib_qp_type type)
-{
-       unsigned long flags;
-       int ret;
-
-       ret = alloc_qpn(qpt, type);
-       if (ret < 0)
-               goto bail;
-       qp->ibqp.qp_num = ret;
-
-       /* Add the QP to the hash table. */
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       ret %= qpt->max;
-       qp->next = qpt->table[ret];
-       qpt->table[ret] = qp;
-       atomic_inc(&qp->refcount);
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_free_qp - remove a QP from the QP table
- * @qpt: the QP table
- * @qp: the QP to remove
- *
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
-{
-       struct ipath_qp *q, **qpp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       /* Remove QP from the hash table. */
-       qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
-       for (; (q = *qpp) != NULL; qpp = &q->next) {
-               if (q == qp) {
-                       *qpp = qp->next;
-                       qp->next = NULL;
-                       atomic_dec(&qp->refcount);
-                       break;
-               }
-       }
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-/**
- * ipath_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
-{
-       unsigned long flags;
-       struct ipath_qp *qp;
-       u32 n, qp_inuse = 0;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-       for (n = 0; n < qpt->max; n++) {
-               qp = qpt->table[n];
-               qpt->table[n] = NULL;
-
-               for (; qp; qp = qp->next)
-                       qp_inuse++;
-       }
-       spin_unlock_irqrestore(&qpt->lock, flags);
-
-       for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
-               if (qpt->map[n].page)
-                       free_page((unsigned long) qpt->map[n].page);
-       return qp_inuse;
-}
-
-/**
- * ipath_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-       unsigned long flags;
-       struct ipath_qp *qp;
-
-       spin_lock_irqsave(&qpt->lock, flags);
-
-       for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
-               if (qp->ibqp.qp_num == qpn) {
-                       atomic_inc(&qp->refcount);
-                       break;
-               }
-       }
-
-       spin_unlock_irqrestore(&qpt->lock, flags);
-       return qp;
-}
-
-/**
- * ipath_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
-{
-       qp->remote_qpn = 0;
-       qp->qkey = 0;
-       qp->qp_access_flags = 0;
-       atomic_set(&qp->s_dma_busy, 0);
-       qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
-       qp->s_hdrwords = 0;
-       qp->s_wqe = NULL;
-       qp->s_pkt_delay = 0;
-       qp->s_draining = 0;
-       qp->s_psn = 0;
-       qp->r_psn = 0;
-       qp->r_msn = 0;
-       if (type == IB_QPT_RC) {
-               qp->s_state = IB_OPCODE_RC_SEND_LAST;
-               qp->r_state = IB_OPCODE_RC_SEND_LAST;
-       } else {
-               qp->s_state = IB_OPCODE_UC_SEND_LAST;
-               qp->r_state = IB_OPCODE_UC_SEND_LAST;
-       }
-       qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-       qp->r_nak_state = 0;
-       qp->r_aflags = 0;
-       qp->r_flags = 0;
-       qp->s_rnr_timeout = 0;
-       qp->s_head = 0;
-       qp->s_tail = 0;
-       qp->s_cur = 0;
-       qp->s_last = 0;
-       qp->s_ssn = 1;
-       qp->s_lsn = 0;
-       memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-       qp->r_head_ack_queue = 0;
-       qp->s_tail_ack_queue = 0;
-       qp->s_num_rd_atomic = 0;
-       if (qp->r_rq.wq) {
-               qp->r_rq.wq->head = 0;
-               qp->r_rq.wq->tail = 0;
-       }
-}
-
-/**
- * ipath_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ib_wc wc;
-       int ret = 0;
-
-       if (qp->state == IB_QPS_ERR)
-               goto bail;
-
-       qp->state = IB_QPS_ERR;
-
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       if (!list_empty(&qp->piowait))
-               list_del_init(&qp->piowait);
-       spin_unlock(&dev->pending_lock);
-
-       /* Schedule the sending tasklet to drain the send work queue. */
-       if (qp->s_last != qp->s_head)
-               ipath_schedule_send(qp);
-
-       memset(&wc, 0, sizeof(wc));
-       wc.qp = &qp->ibqp;
-       wc.opcode = IB_WC_RECV;
-
-       if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
-               wc.wr_id = qp->r_wr_id;
-               wc.status = err;
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       }
-       wc.status = IB_WC_WR_FLUSH_ERR;
-
-       if (qp->r_rq.wq) {
-               struct ipath_rwq *wq;
-               u32 head;
-               u32 tail;
-
-               spin_lock(&qp->r_rq.lock);
-
-               /* sanity check pointers before trusting them */
-               wq = qp->r_rq.wq;
-               head = wq->head;
-               if (head >= qp->r_rq.size)
-                       head = 0;
-               tail = wq->tail;
-               if (tail >= qp->r_rq.size)
-                       tail = 0;
-               while (tail != head) {
-                       wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-                       if (++tail >= qp->r_rq.size)
-                               tail = 0;
-                       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-               }
-               wq->tail = tail;
-
-               spin_unlock(&qp->r_rq.lock);
-       } else if (qp->ibqp.event_handler)
-               ret = 1;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for ipathverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask, struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_qp *qp = to_iqp(ibqp);
-       enum ib_qp_state cur_state, new_state;
-       int lastwqe = 0;
-       int ret;
-
-       spin_lock_irq(&qp->s_lock);
-
-       cur_state = attr_mask & IB_QP_CUR_STATE ?
-               attr->cur_qp_state : qp->state;
-       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-                               attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-               goto inval;
-
-       if (attr_mask & IB_QP_AV) {
-               if (attr->ah_attr.dlid == 0 ||
-                   attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
-                       goto inval;
-
-               if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
-                   (attr->ah_attr.grh.sgid_index > 1))
-                       goto inval;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
-                       goto inval;
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER)
-               if (attr->min_rnr_timer > 31)
-                       goto inval;
-
-       if (attr_mask & IB_QP_PORT)
-               if (attr->port_num == 0 ||
-                   attr->port_num > ibqp->device->phys_port_cnt)
-                       goto inval;
-
-       /*
-        * don't allow invalid Path MTU values or greater than 2048
-        * unless we are configured for a 4KB MTU
-        */
-       if ((attr_mask & IB_QP_PATH_MTU) &&
-               (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
-               (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
-               goto inval;
-
-       if (attr_mask & IB_QP_PATH_MIG_STATE)
-               if (attr->path_mig_state != IB_MIG_MIGRATED &&
-                   attr->path_mig_state != IB_MIG_REARM)
-                       goto inval;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
-                       goto inval;
-
-       switch (new_state) {
-       case IB_QPS_RESET:
-               if (qp->state != IB_QPS_RESET) {
-                       qp->state = IB_QPS_RESET;
-                       spin_lock(&dev->pending_lock);
-                       if (!list_empty(&qp->timerwait))
-                               list_del_init(&qp->timerwait);
-                       if (!list_empty(&qp->piowait))
-                               list_del_init(&qp->piowait);
-                       spin_unlock(&dev->pending_lock);
-                       qp->s_flags &= ~IPATH_S_ANY_WAIT;
-                       spin_unlock_irq(&qp->s_lock);
-                       /* Stop the sending tasklet */
-                       tasklet_kill(&qp->s_task);
-                       wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-                       spin_lock_irq(&qp->s_lock);
-               }
-               ipath_reset_qp(qp, ibqp->qp_type);
-               break;
-
-       case IB_QPS_SQD:
-               qp->s_draining = qp->s_last != qp->s_cur;
-               qp->state = new_state;
-               break;
-
-       case IB_QPS_SQE:
-               if (qp->ibqp.qp_type == IB_QPT_RC)
-                       goto inval;
-               qp->state = new_state;
-               break;
-
-       case IB_QPS_ERR:
-               lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-               break;
-
-       default:
-               qp->state = new_state;
-               break;
-       }
-
-       if (attr_mask & IB_QP_PKEY_INDEX)
-               qp->s_pkey_index = attr->pkey_index;
-
-       if (attr_mask & IB_QP_DEST_QPN)
-               qp->remote_qpn = attr->dest_qp_num;
-
-       if (attr_mask & IB_QP_SQ_PSN) {
-               qp->s_psn = qp->s_next_psn = attr->sq_psn;
-               qp->s_last_psn = qp->s_next_psn - 1;
-       }
-
-       if (attr_mask & IB_QP_RQ_PSN)
-               qp->r_psn = attr->rq_psn;
-
-       if (attr_mask & IB_QP_ACCESS_FLAGS)
-               qp->qp_access_flags = attr->qp_access_flags;
-
-       if (attr_mask & IB_QP_AV) {
-               qp->remote_ah_attr = attr->ah_attr;
-               qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
-       }
-
-       if (attr_mask & IB_QP_PATH_MTU)
-               qp->path_mtu = attr->path_mtu;
-
-       if (attr_mask & IB_QP_RETRY_CNT)
-               qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
-
-       if (attr_mask & IB_QP_RNR_RETRY) {
-               qp->s_rnr_retry = attr->rnr_retry;
-               if (qp->s_rnr_retry > 7)
-                       qp->s_rnr_retry = 7;
-               qp->s_rnr_retry_cnt = qp->s_rnr_retry;
-       }
-
-       if (attr_mask & IB_QP_MIN_RNR_TIMER)
-               qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-       if (attr_mask & IB_QP_TIMEOUT)
-               qp->timeout = attr->timeout;
-
-       if (attr_mask & IB_QP_QKEY)
-               qp->qkey = attr->qkey;
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-               qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-               qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-       spin_unlock_irq(&qp->s_lock);
-
-       if (lastwqe) {
-               struct ib_event ev;
-
-               ev.device = qp->ibqp.device;
-               ev.element.qp = &qp->ibqp;
-               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-       }
-       ret = 0;
-       goto bail;
-
-inval:
-       spin_unlock_irq(&qp->s_lock);
-       ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                  int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-
-       attr->qp_state = qp->state;
-       attr->cur_qp_state = attr->qp_state;
-       attr->path_mtu = qp->path_mtu;
-       attr->path_mig_state = 0;
-       attr->qkey = qp->qkey;
-       attr->rq_psn = qp->r_psn;
-       attr->sq_psn = qp->s_next_psn;
-       attr->dest_qp_num = qp->remote_qpn;
-       attr->qp_access_flags = qp->qp_access_flags;
-       attr->cap.max_send_wr = qp->s_size - 1;
-       attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-       attr->cap.max_send_sge = qp->s_max_sge;
-       attr->cap.max_recv_sge = qp->r_rq.max_sge;
-       attr->cap.max_inline_data = 0;
-       attr->ah_attr = qp->remote_ah_attr;
-       memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
-       attr->pkey_index = qp->s_pkey_index;
-       attr->alt_pkey_index = 0;
-       attr->en_sqd_async_notify = 0;
-       attr->sq_draining = qp->s_draining;
-       attr->max_rd_atomic = qp->s_max_rd_atomic;
-       attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-       attr->min_rnr_timer = qp->r_min_rnr_timer;
-       attr->port_num = 1;
-       attr->timeout = qp->timeout;
-       attr->retry_cnt = qp->s_retry_cnt;
-       attr->rnr_retry = qp->s_rnr_retry_cnt;
-       attr->alt_port_num = 0;
-       attr->alt_timeout = 0;
-
-       init_attr->event_handler = qp->ibqp.event_handler;
-       init_attr->qp_context = qp->ibqp.qp_context;
-       init_attr->send_cq = qp->ibqp.send_cq;
-       init_attr->recv_cq = qp->ibqp.recv_cq;
-       init_attr->srq = qp->ibqp.srq;
-       init_attr->cap = attr->cap;
-       if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
-               init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-       else
-               init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-       init_attr->qp_type = qp->ibqp.qp_type;
-       init_attr->port_num = 1;
-       return 0;
-}
-
-/**
- * ipath_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 ipath_compute_aeth(struct ipath_qp *qp)
-{
-       u32 aeth = qp->r_msn & IPATH_MSN_MASK;
-
-       if (qp->ibqp.srq) {
-               /*
-                * Shared receive queues don't generate credits.
-                * Set the credit field to the invalid value.
-                */
-               aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
-       } else {
-               u32 min, max, x;
-               u32 credits;
-               struct ipath_rwq *wq = qp->r_rq.wq;
-               u32 head;
-               u32 tail;
-
-               /* sanity check pointers before trusting them */
-               head = wq->head;
-               if (head >= qp->r_rq.size)
-                       head = 0;
-               tail = wq->tail;
-               if (tail >= qp->r_rq.size)
-                       tail = 0;
-               /*
-                * Compute the number of credits available (RWQEs).
-                * XXX Not holding the r_rq.lock here so there is a small
-                * chance that the pair of reads are not atomic.
-                */
-               credits = head - tail;
-               if ((int)credits < 0)
-                       credits += qp->r_rq.size;
-               /*
-                * Binary search the credit table to find the code to
-                * use.
-                */
-               min = 0;
-               max = 31;
-               for (;;) {
-                       x = (min + max) / 2;
-                       if (credit_table[x] == credits)
-                               break;
-                       if (credit_table[x] > credits)
-                               max = x;
-                       else if (min == x)
-                               break;
-                       else
-                               min = x;
-               }
-               aeth |= x << IPATH_AETH_CREDIT_SHIFT;
-       }
-       return cpu_to_be32(aeth);
-}
-
-/**
- * ipath_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: unused by InfiniPath
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-                             struct ib_qp_init_attr *init_attr,
-                             struct ib_udata *udata)
-{
-       struct ipath_qp *qp;
-       int err;
-       struct ipath_swqe *swq = NULL;
-       struct ipath_ibdev *dev;
-       size_t sz;
-       size_t sg_list_sz;
-       struct ib_qp *ret;
-
-       if (init_attr->create_flags) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
-           init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       /* Check receive queue parameters if no SRQ is specified. */
-       if (!init_attr->srq) {
-               if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
-                   init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-               if (init_attr->cap.max_send_sge +
-                   init_attr->cap.max_send_wr +
-                   init_attr->cap.max_recv_sge +
-                   init_attr->cap.max_recv_wr == 0) {
-                       ret = ERR_PTR(-EINVAL);
-                       goto bail;
-               }
-       }
-
-       switch (init_attr->qp_type) {
-       case IB_QPT_UC:
-       case IB_QPT_RC:
-       case IB_QPT_UD:
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               sz = sizeof(struct ipath_sge) *
-                       init_attr->cap.max_send_sge +
-                       sizeof(struct ipath_swqe);
-               swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-               if (swq == NULL) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail;
-               }
-               sz = sizeof(*qp);
-               sg_list_sz = 0;
-               if (init_attr->srq) {
-                       struct ipath_srq *srq = to_isrq(init_attr->srq);
-
-                       if (srq->rq.max_sge > 1)
-                               sg_list_sz = sizeof(*qp->r_sg_list) *
-                                       (srq->rq.max_sge - 1);
-               } else if (init_attr->cap.max_recv_sge > 1)
-                       sg_list_sz = sizeof(*qp->r_sg_list) *
-                               (init_attr->cap.max_recv_sge - 1);
-               qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
-               if (!qp) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_swq;
-               }
-               if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
-                   init_attr->qp_type == IB_QPT_SMI ||
-                   init_attr->qp_type == IB_QPT_GSI)) {
-                       qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
-                       if (!qp->r_ud_sg_list) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_qp;
-                       }
-               } else
-                       qp->r_ud_sg_list = NULL;
-               if (init_attr->srq) {
-                       sz = 0;
-                       qp->r_rq.size = 0;
-                       qp->r_rq.max_sge = 0;
-                       qp->r_rq.wq = NULL;
-                       init_attr->cap.max_recv_wr = 0;
-                       init_attr->cap.max_recv_sge = 0;
-               } else {
-                       qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-                       qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-                       sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-                               sizeof(struct ipath_rwqe);
-                       qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
-                                             qp->r_rq.size * sz);
-                       if (!qp->r_rq.wq) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_sg_list;
-                       }
-               }
-
-               /*
-                * ib_create_qp() will initialize qp->ibqp
-                * except for qp->ibqp.qp_num.
-                */
-               spin_lock_init(&qp->s_lock);
-               spin_lock_init(&qp->r_rq.lock);
-               atomic_set(&qp->refcount, 0);
-               init_waitqueue_head(&qp->wait);
-               init_waitqueue_head(&qp->wait_dma);
-               tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
-               INIT_LIST_HEAD(&qp->piowait);
-               INIT_LIST_HEAD(&qp->timerwait);
-               qp->state = IB_QPS_RESET;
-               qp->s_wq = swq;
-               qp->s_size = init_attr->cap.max_send_wr + 1;
-               qp->s_max_sge = init_attr->cap.max_send_sge;
-               if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-                       qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
-               else
-                       qp->s_flags = 0;
-               dev = to_idev(ibpd->device);
-               err = ipath_alloc_qpn(&dev->qp_table, qp,
-                                     init_attr->qp_type);
-               if (err) {
-                       ret = ERR_PTR(err);
-                       vfree(qp->r_rq.wq);
-                       goto bail_sg_list;
-               }
-               qp->ip = NULL;
-               qp->s_tx = NULL;
-               ipath_reset_qp(qp, init_attr->qp_type);
-               break;
-
-       default:
-               /* Don't support raw QPs */
-               ret = ERR_PTR(-ENOSYS);
-               goto bail;
-       }
-
-       init_attr->cap.max_inline_data = 0;
-
-       /*
-        * Return the address of the RWQ as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               if (!qp->r_rq.wq) {
-                       __u64 offset = 0;
-
-                       err = ib_copy_to_udata(udata, &offset,
-                                              sizeof(offset));
-                       if (err) {
-                               ret = ERR_PTR(err);
-                               goto bail_ip;
-                       }
-               } else {
-                       u32 s = sizeof(struct ipath_rwq) +
-                               qp->r_rq.size * sz;
-
-                       qp->ip =
-                           ipath_create_mmap_info(dev, s,
-                                                  ibpd->uobject->context,
-                                                  qp->r_rq.wq);
-                       if (!qp->ip) {
-                               ret = ERR_PTR(-ENOMEM);
-                               goto bail_ip;
-                       }
-
-                       err = ib_copy_to_udata(udata, &(qp->ip->offset),
-                                              sizeof(qp->ip->offset));
-                       if (err) {
-                               ret = ERR_PTR(err);
-                               goto bail_ip;
-                       }
-               }
-       }
-
-       spin_lock(&dev->n_qps_lock);
-       if (dev->n_qps_allocated == ib_ipath_max_qps) {
-               spin_unlock(&dev->n_qps_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_qps_allocated++;
-       spin_unlock(&dev->n_qps_lock);
-
-       if (qp->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = &qp->ibqp;
-       goto bail;
-
-bail_ip:
-       if (qp->ip)
-               kref_put(&qp->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(qp->r_rq.wq);
-       ipath_free_qp(&dev->qp_table, qp);
-       free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-bail_sg_list:
-       kfree(qp->r_ud_sg_list);
-bail_qp:
-       kfree(qp);
-bail_swq:
-       vfree(swq);
-bail:
-       return ret;
-}
-
-/**
- * ipath_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int ipath_destroy_qp(struct ib_qp *ibqp)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-
-       /* Make sure HW and driver activity is stopped. */
-       spin_lock_irq(&qp->s_lock);
-       if (qp->state != IB_QPS_RESET) {
-               qp->state = IB_QPS_RESET;
-               spin_lock(&dev->pending_lock);
-               if (!list_empty(&qp->timerwait))
-                       list_del_init(&qp->timerwait);
-               if (!list_empty(&qp->piowait))
-                       list_del_init(&qp->piowait);
-               spin_unlock(&dev->pending_lock);
-               qp->s_flags &= ~IPATH_S_ANY_WAIT;
-               spin_unlock_irq(&qp->s_lock);
-               /* Stop the sending tasklet */
-               tasklet_kill(&qp->s_task);
-               wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-       } else
-               spin_unlock_irq(&qp->s_lock);
-
-       ipath_free_qp(&dev->qp_table, qp);
-
-       if (qp->s_tx) {
-               atomic_dec(&qp->refcount);
-               if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-                       kfree(qp->s_tx->txreq.map_addr);
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
-               spin_unlock_irq(&dev->pending_lock);
-               qp->s_tx = NULL;
-       }
-
-       wait_event(qp->wait, !atomic_read(&qp->refcount));
-
-       /* all user's cleaned up, mark it available */
-       free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-       spin_lock(&dev->n_qps_lock);
-       dev->n_qps_allocated--;
-       spin_unlock(&dev->n_qps_lock);
-
-       if (qp->ip)
-               kref_put(&qp->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(qp->r_rq.wq);
-       kfree(qp->r_ud_sg_list);
-       vfree(qp->s_wq);
-       kfree(qp);
-       return 0;
-}
-
-/**
- * ipath_init_qp_table - initialize the QP table for a device
- * @idev: the device who's QP table we're initializing
- * @size: the size of the QP table
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
-{
-       int i;
-       int ret;
-
-       idev->qp_table.last = 1;        /* QPN 0 and 1 are special. */
-       idev->qp_table.max = size;
-       idev->qp_table.nmaps = 1;
-       idev->qp_table.table = kcalloc(size, sizeof(*idev->qp_table.table),
-                                      GFP_KERNEL);
-       if (idev->qp_table.table == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
-               atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
-               idev->qp_table.map[i].page = NULL;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
-{
-       u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
-
-       /*
-        * If the credit is invalid, we can send
-        * as many packets as we like.  Otherwise, we have to
-        * honor the credit field.
-        */
-       if (credit == IPATH_AETH_CREDIT_INVAL)
-               qp->s_lsn = (u32) -1;
-       else if (qp->s_lsn != (u32) -1) {
-               /* Compute new LSN (i.e., MSN + credit) */
-               credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
-               if (ipath_cmp24(credit, qp->s_lsn) > 0)
-                       qp->s_lsn = credit;
-       }
-
-       /* Restart sending if it was blocked due to lack of credits. */
-       if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
-           qp->s_cur != qp->s_head &&
-           (qp->s_lsn == (u32) -1 ||
-            ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
-                        qp->s_lsn + 1) <= 0))
-               ipath_schedule_send(qp);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
deleted file mode 100644 (file)
index d4aa535..0000000
+++ /dev/null
@@ -1,1969 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
-
-static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
-                      u32 psn, u32 pmtu)
-{
-       u32 len;
-
-       len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-       ss->sge = wqe->sg_list[0];
-       ss->sg_list = wqe->sg_list + 1;
-       ss->num_sge = wqe->wr.num_sge;
-       ipath_skip_sge(ss, len);
-       return wqe->length - len;
-}
-
-/**
- * ipath_init_restart- initialize the qp->s_sge after a restart
- * @qp: the QP who's SGE we're restarting
- * @wqe: the work queue to initialize the QP's SGE from
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
-{
-       struct ipath_ibdev *dev;
-
-       qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
-                               ib_mtu_enum_to_int(qp->path_mtu));
-       dev = to_idev(qp->ibqp.device);
-       spin_lock(&dev->pending_lock);
-       if (list_empty(&qp->timerwait))
-               list_add_tail(&qp->timerwait,
-                             &dev->pending[dev->pending_index]);
-       spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
- * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- *
- * Return 1 if constructed; otherwise, return 0.
- * Note that we are in the responder's side of the QP context.
- * Note the QP s_lock must be held.
- */
-static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                            struct ipath_other_headers *ohdr, u32 pmtu)
-{
-       struct ipath_ack_entry *e;
-       u32 hwords;
-       u32 len;
-       u32 bth0;
-       u32 bth2;
-
-       /* Don't send an ACK if we aren't supposed to. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto bail;
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-
-       switch (qp->s_ack_state) {
-       case OP(RDMA_READ_RESPONSE_LAST):
-       case OP(RDMA_READ_RESPONSE_ONLY):
-       case OP(ATOMIC_ACKNOWLEDGE):
-               /*
-                * We can increment the tail pointer now that the last
-                * response has been sent instead of only being
-                * constructed.
-                */
-               if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-                       qp->s_tail_ack_queue = 0;
-               /* FALLTHROUGH */
-       case OP(SEND_ONLY):
-       case OP(ACKNOWLEDGE):
-               /* Check for no next entry in the queue. */
-               if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-                       if (qp->s_flags & IPATH_S_ACK_PENDING)
-                               goto normal;
-                       qp->s_ack_state = OP(ACKNOWLEDGE);
-                       goto bail;
-               }
-
-               e = &qp->s_ack_queue[qp->s_tail_ack_queue];
-               if (e->opcode == OP(RDMA_READ_REQUEST)) {
-                       /* Copy SGE state in case we need to resend */
-                       qp->s_ack_rdma_sge = e->rdma_sge;
-                       qp->s_cur_sge = &qp->s_ack_rdma_sge;
-                       len = e->rdma_sge.sge.sge_length;
-                       if (len > pmtu) {
-                               len = pmtu;
-                               qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-                       } else {
-                               qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-                               e->sent = 1;
-                       }
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-                       hwords++;
-                       qp->s_ack_rdma_psn = e->psn;
-                       bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-               } else {
-                       /* COMPARE_SWAP or FETCH_ADD */
-                       qp->s_cur_sge = NULL;
-                       len = 0;
-                       qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
-                       ohdr->u.at.aeth = ipath_compute_aeth(qp);
-                       ohdr->u.at.atomic_ack_eth[0] =
-                               cpu_to_be32(e->atomic_data >> 32);
-                       ohdr->u.at.atomic_ack_eth[1] =
-                               cpu_to_be32(e->atomic_data);
-                       hwords += sizeof(ohdr->u.at) / sizeof(u32);
-                       bth2 = e->psn;
-                       e->sent = 1;
-               }
-               bth0 = qp->s_ack_state << 24;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               len = qp->s_ack_rdma_sge.sge.sge_length;
-               if (len > pmtu)
-                       len = pmtu;
-               else {
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-                       hwords++;
-                       qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-                       qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
-               }
-               bth0 = qp->s_ack_state << 24;
-               bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-               break;
-
-       default:
-       normal:
-               /*
-                * Send a regular ACK.
-                * Set the s_ack_state so we wait until after sending
-                * the ACK before setting s_ack_state to ACKNOWLEDGE
-                * (see above).
-                */
-               qp->s_ack_state = OP(SEND_ONLY);
-               qp->s_flags &= ~IPATH_S_ACK_PENDING;
-               qp->s_cur_sge = NULL;
-               if (qp->s_nak_state)
-                       ohdr->u.aeth =
-                               cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-                                           (qp->s_nak_state <<
-                                            IPATH_AETH_CREDIT_SHIFT));
-               else
-                       ohdr->u.aeth = ipath_compute_aeth(qp);
-               hwords++;
-               len = 0;
-               bth0 = OP(ACKNOWLEDGE) << 24;
-               bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
-       }
-       qp->s_hdrwords = hwords;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
-       return 1;
-
-bail:
-       return 0;
-}
-
-/**
- * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_rc_req(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_other_headers *ohdr;
-       struct ipath_sge_state *ss;
-       struct ipath_swqe *wqe;
-       u32 hwords;
-       u32 len;
-       u32 bth0;
-       u32 bth2;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       char newreq;
-       unsigned long flags;
-       int ret = 0;
-
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
-
-       /*
-        * The lock is needed to synchronize between the sending tasklet,
-        * the receive interrupt handler, and timeout resends.
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Sending responses has higher priority over sending requests. */
-       if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-            (qp->s_flags & IPATH_S_ACK_PENDING) ||
-            qp->s_ack_state != OP(ACKNOWLEDGE)) &&
-           ipath_make_rc_ack(dev, qp, ohdr, pmtu))
-               goto done;
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       /* Leave BUSY set until RNR timeout. */
-       if (qp->s_rnr_timeout) {
-               qp->s_flags |= IPATH_S_WAITING;
-               goto bail;
-       }
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-       bth0 = 1 << 22; /* Set M bit */
-
-       /* Send a request. */
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       switch (qp->s_state) {
-       default:
-               if (!(ib_ipath_state_ops[qp->state] &
-                   IPATH_PROCESS_NEXT_SEND_OK))
-                       goto bail;
-               /*
-                * Resend an old request or start a new one.
-                *
-                * We keep track of the current SWQE so that
-                * we don't reset the "furthest progress" state
-                * if we need to back up.
-                */
-               newreq = 0;
-               if (qp->s_cur == qp->s_tail) {
-                       /* Check if send work queue is empty. */
-                       if (qp->s_tail == qp->s_head)
-                               goto bail;
-                       /*
-                        * If a fence is requested, wait for previous
-                        * RDMA read and atomic operations to finish.
-                        */
-                       if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
-                           qp->s_num_rd_atomic) {
-                               qp->s_flags |= IPATH_S_FENCE_PENDING;
-                               goto bail;
-                       }
-                       wqe->psn = qp->s_next_psn;
-                       newreq = 1;
-               }
-               /*
-                * Note that we have to be careful not to modify the
-                * original work request since we may need to resend
-                * it.
-                */
-               len = wqe->length;
-               ss = &qp->s_sge;
-               bth2 = 0;
-               switch (wqe->wr.opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
-                       /* If no credit, return. */
-                       if (qp->s_lsn != (u32) -1 &&
-                           ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-                               goto bail;
-                       }
-                       wqe->lpsn = wqe->psn;
-                       if (len > pmtu) {
-                               wqe->lpsn += (len - 1) / pmtu;
-                               qp->s_state = OP(SEND_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_SEND)
-                               qp->s_state = OP(SEND_ONLY);
-                       else {
-                               qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                       }
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-                       bth2 = 1 << 31; /* Request ACK. */
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_WRITE:
-                       if (newreq && qp->s_lsn != (u32) -1)
-                               qp->s_lsn++;
-                       /* FALLTHROUGH */
-               case IB_WR_RDMA_WRITE_WITH_IMM:
-                       /* If no credit, return. */
-                       if (qp->s_lsn != (u32) -1 &&
-                           ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-                               qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-                               goto bail;
-                       }
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       hwords += sizeof(struct ib_reth) / sizeof(u32);
-                       wqe->lpsn = wqe->psn;
-                       if (len > pmtu) {
-                               wqe->lpsn += (len - 1) / pmtu;
-                               qp->s_state = OP(RDMA_WRITE_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                               qp->s_state = OP(RDMA_WRITE_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                                       bth0 |= 1 << 23;
-                       }
-                       bth2 = 1 << 31; /* Request ACK. */
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_READ:
-                       /*
-                        * Don't allow more operations to be started
-                        * than the QP limits allow.
-                        */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= IPATH_S_RDMAR_PENDING;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (qp->s_lsn != (u32) -1)
-                                       qp->s_lsn++;
-                               /*
-                                * Adjust s_next_psn to count the
-                                * expected number of responses.
-                                */
-                               if (len > pmtu)
-                                       qp->s_next_psn += (len - 1) / pmtu;
-                               wqe->lpsn = qp->s_next_psn++;
-                       }
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       qp->s_state = OP(RDMA_READ_REQUEST);
-                       hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-                       ss = NULL;
-                       len = 0;
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_ATOMIC_CMP_AND_SWP:
-               case IB_WR_ATOMIC_FETCH_AND_ADD:
-                       /*
-                        * Don't allow more operations to be started
-                        * than the QP limits allow.
-                        */
-                       if (newreq) {
-                               if (qp->s_num_rd_atomic >=
-                                   qp->s_max_rd_atomic) {
-                                       qp->s_flags |= IPATH_S_RDMAR_PENDING;
-                                       goto bail;
-                               }
-                               qp->s_num_rd_atomic++;
-                               if (qp->s_lsn != (u32) -1)
-                                       qp->s_lsn++;
-                               wqe->lpsn = wqe->psn;
-                       }
-                       if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-                               qp->s_state = OP(COMPARE_SWAP);
-                               ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->atomic_wr.swap);
-                               ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-                                       wqe->atomic_wr.compare_add);
-                       } else {
-                               qp->s_state = OP(FETCH_ADD);
-                               ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->atomic_wr.compare_add);
-                               ohdr->u.atomic_eth.compare_data = 0;
-                       }
-                       ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-                               wqe->atomic_wr.remote_addr >> 32);
-                       ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-                               wqe->atomic_wr.remote_addr);
-                       ohdr->u.atomic_eth.rkey = cpu_to_be32(
-                               wqe->atomic_wr.rkey);
-                       hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
-                       ss = NULL;
-                       len = 0;
-                       if (++qp->s_cur == qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               default:
-                       goto bail;
-               }
-               qp->s_sge.sge = wqe->sg_list[0];
-               qp->s_sge.sg_list = wqe->sg_list + 1;
-               qp->s_sge.num_sge = wqe->wr.num_sge;
-               qp->s_len = wqe->length;
-               if (newreq) {
-                       qp->s_tail++;
-                       if (qp->s_tail >= qp->s_size)
-                               qp->s_tail = 0;
-               }
-               bth2 |= qp->s_psn & IPATH_PSN_MASK;
-               if (wqe->wr.opcode == IB_WR_RDMA_READ)
-                       qp->s_psn = wqe->lpsn + 1;
-               else {
-                       qp->s_psn++;
-                       if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                               qp->s_next_psn = qp->s_psn;
-               }
-               /*
-                * Put the QP on the pending list so lost ACKs will cause
-                * a retry.  More than one request can be pending so the
-                * QP may already be on the dev->pending list.
-                */
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->timerwait))
-                       list_add_tail(&qp->timerwait,
-                                     &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
-               break;
-
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               /*
-                * This case can only happen if a send is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               /* FALLTHROUGH */
-       case OP(SEND_FIRST):
-               qp->s_state = OP(SEND_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
-               ss = &qp->s_sge;
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_SEND)
-                       qp->s_state = OP(SEND_LAST);
-               else {
-                       qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-               }
-               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                       bth0 |= 1 << 23;
-               bth2 |= 1 << 31;        /* Request ACK. */
-               qp->s_cur++;
-               if (qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_LAST):
-               /*
-                * This case can only happen if a RDMA write is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_FIRST):
-               qp->s_state = OP(RDMA_WRITE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
-               ss = &qp->s_sge;
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                       qp->s_state = OP(RDMA_WRITE_LAST);
-               else {
-                       qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-               }
-               bth2 |= 1 << 31;        /* Request ACK. */
-               qp->s_cur++;
-               if (qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               /*
-                * This case can only happen if a RDMA read is restarted.
-                * See ipath_restart_rc().
-                */
-               ipath_init_restart(qp, wqe);
-               len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-               ohdr->u.rc.reth.vaddr =
-                       cpu_to_be64(wqe->rdma_wr.remote_addr + len);
-               ohdr->u.rc.reth.rkey =
-                       cpu_to_be32(wqe->rdma_wr.rkey);
-               ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
-               qp->s_state = OP(RDMA_READ_REQUEST);
-               hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-               bth2 = qp->s_psn & IPATH_PSN_MASK;
-               qp->s_psn = wqe->lpsn + 1;
-               ss = NULL;
-               len = 0;
-               qp->s_cur++;
-               if (qp->s_cur == qp->s_size)
-                       qp->s_cur = 0;
-               break;
-       }
-       if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
-               bth2 |= 1 << 31;        /* Request ACK. */
-       qp->s_len -= len;
-       qp->s_hdrwords = hwords;
-       qp->s_cur_sge = ss;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
- *
- * This is called from ipath_rc_rcv() and only uses the receive
- * side QP state.
- * Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
- */
-static void send_rc_ack(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_devdata *dd;
-       u16 lrh0;
-       u32 bth0;
-       u32 hwords;
-       u32 __iomem *piobuf;
-       struct ipath_ib_header hdr;
-       struct ipath_other_headers *ohdr;
-       unsigned long flags;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-       if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-           (qp->s_flags & IPATH_S_ACK_PENDING) ||
-           qp->s_ack_state != OP(ACKNOWLEDGE))
-               goto queue_ack;
-
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       /* Don't try to send ACKs if the link isn't ACTIVE */
-       dd = dev->dd;
-       if (!(dd->ipath_flags & IPATH_LINKACTIVE))
-               goto done;
-
-       piobuf = ipath_getpiobuf(dd, 0, NULL);
-       if (!piobuf) {
-               /*
-                * We are out of PIO buffers at the moment.
-                * Pass responsibility for sending the ACK to the
-                * send tasklet so that when a PIO buffer becomes
-                * available, the ACK is sent ahead of other outgoing
-                * packets.
-                */
-               spin_lock_irqsave(&qp->s_lock, flags);
-               goto queue_ack;
-       }
-
-       /* Construct the header. */
-       ohdr = &hdr.u.oth;
-       lrh0 = IPATH_LRH_BTH;
-       /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
-       hwords = 6;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               hwords += ipath_make_grh(dev, &hdr.u.l.grh,
-                                        &qp->remote_ah_attr.grh,
-                                        hwords, 0);
-               ohdr = &hdr.u.l.oth;
-               lrh0 = IPATH_LRH_GRH;
-       }
-       /* read pkey_index w/o lock (its atomic) */
-       bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
-               (OP(ACKNOWLEDGE) << 24) | (1 << 22);
-       if (qp->r_nak_state)
-               ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-                                           (qp->r_nak_state <<
-                                            IPATH_AETH_CREDIT_SHIFT));
-       else
-               ohdr->u.aeth = ipath_compute_aeth(qp);
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       hdr.lrh[0] = cpu_to_be16(lrh0);
-       hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-       hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
-                                qp->remote_ah_attr.src_path_bits);
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
-
-       writeq(hwords + 1, piobuf);
-
-       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-               u32 *hdrp = (u32 *) &hdr;
-
-               ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
-               ipath_flush_wc();
-               __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
-       } else
-               __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
-
-       ipath_flush_wc();
-
-       dev->n_unicast_xmit++;
-       goto done;
-
-queue_ack:
-       if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
-               dev->n_rc_qacks++;
-               qp->s_flags |= IPATH_S_ACK_PENDING;
-               qp->s_nak_state = qp->r_nak_state;
-               qp->s_ack_psn = qp->r_ack_psn;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-       }
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return;
-}
-
-/**
- * reset_psn - reset the QP state to send starting from PSN
- * @qp: the QP
- * @psn: the packet sequence number to restart at
- *
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held.
- */
-static void reset_psn(struct ipath_qp *qp, u32 psn)
-{
-       u32 n = qp->s_last;
-       struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
-       u32 opcode;
-
-       qp->s_cur = n;
-
-       /*
-        * If we are starting the request from the beginning,
-        * let the normal send code handle initialization.
-        */
-       if (ipath_cmp24(psn, wqe->psn) <= 0) {
-               qp->s_state = OP(SEND_LAST);
-               goto done;
-       }
-
-       /* Find the work request opcode corresponding to the given PSN. */
-       opcode = wqe->wr.opcode;
-       for (;;) {
-               int diff;
-
-               if (++n == qp->s_size)
-                       n = 0;
-               if (n == qp->s_tail)
-                       break;
-               wqe = get_swqe_ptr(qp, n);
-               diff = ipath_cmp24(psn, wqe->psn);
-               if (diff < 0)
-                       break;
-               qp->s_cur = n;
-               /*
-                * If we are starting the request from the beginning,
-                * let the normal send code handle initialization.
-                */
-               if (diff == 0) {
-                       qp->s_state = OP(SEND_LAST);
-                       goto done;
-               }
-               opcode = wqe->wr.opcode;
-       }
-
-       /*
-        * Set the state to restart in the middle of a request.
-        * Don't change the s_sge, s_cur_sge, or s_cur_size.
-        * See ipath_make_rc_req().
-        */
-       switch (opcode) {
-       case IB_WR_SEND:
-       case IB_WR_SEND_WITH_IMM:
-               qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
-               break;
-
-       case IB_WR_RDMA_WRITE:
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
-               break;
-
-       case IB_WR_RDMA_READ:
-               qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-               break;
-
-       default:
-               /*
-                * This case shouldn't happen since its only
-                * one PSN per req.
-                */
-               qp->s_state = OP(SEND_LAST);
-       }
-done:
-       qp->s_psn = psn;
-}
-
-/**
- * ipath_restart_rc - back up requester to resend the last un-ACKed request
- * @qp: the QP to restart
- * @psn: packet sequence number for the request
- * @wc: the work completion request
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
-{
-       struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-       struct ipath_ibdev *dev;
-
-       if (qp->s_retry == 0) {
-               ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-               ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-               goto bail;
-       }
-       qp->s_retry--;
-
-       /*
-        * Remove the QP from the timeout queue.
-        * Note: it may already have been removed by ipath_ib_timer().
-        */
-       dev = to_idev(qp->ibqp.device);
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       if (!list_empty(&qp->piowait))
-               list_del_init(&qp->piowait);
-       spin_unlock(&dev->pending_lock);
-
-       if (wqe->wr.opcode == IB_WR_RDMA_READ)
-               dev->n_rc_resends++;
-       else
-               dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-       reset_psn(qp, psn);
-       ipath_schedule_send(qp);
-
-bail:
-       return;
-}
-
-static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
-{
-       qp->s_last_psn = psn;
-}
-
-/**
- * do_rc_ack - process an incoming RC ACK
- * @qp: the QP the ACK came in on
- * @psn: the packet sequence number of the ACK
- * @opcode: the opcode of the request that resulted in the ACK
- *
- * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held and interrupts disabled.
- * Returns 1 if OK, 0 if current operation should be aborted (NAK).
- */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
-                    u64 val)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ib_wc wc;
-       enum ib_wc_status status;
-       struct ipath_swqe *wqe;
-       int ret = 0;
-       u32 ack_psn;
-       int diff;
-
-       /*
-        * Remove the QP from the timeout queue (or RNR timeout queue).
-        * If ipath_ib_timer() has already removed it,
-        * it's OK since we hold the QP s_lock and ipath_restart_rc()
-        * just won't find anything to restart if we ACK everything.
-        */
-       spin_lock(&dev->pending_lock);
-       if (!list_empty(&qp->timerwait))
-               list_del_init(&qp->timerwait);
-       spin_unlock(&dev->pending_lock);
-
-       /*
-        * Note that NAKs implicitly ACK outstanding SEND and RDMA write
-        * requests and implicitly NAK RDMA read and atomic requests issued
-        * before the NAK'ed request.  The MSN won't include the NAK'ed
-        * request but will include an ACK'ed request(s).
-        */
-       ack_psn = psn;
-       if (aeth >> 29)
-               ack_psn--;
-       wqe = get_swqe_ptr(qp, qp->s_last);
-
-       /*
-        * The MSN might be for a later WQE than the PSN indicates so
-        * only complete WQEs that the PSN finishes.
-        */
-       while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
-               /*
-                * RDMA_READ_RESPONSE_ONLY is a special case since
-                * we want to generate completion events for everything
-                * before the RDMA read, copy the data, then generate
-                * the completion for the read.
-                */
-               if (wqe->wr.opcode == IB_WR_RDMA_READ &&
-                   opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
-                   diff == 0) {
-                       ret = 1;
-                       goto bail;
-               }
-               /*
-                * If this request is a RDMA read or atomic, and the ACK is
-                * for a later operation, this ACK NAKs the RDMA read or
-                * atomic.  In other words, only a RDMA_READ_LAST or ONLY
-                * can ACK a RDMA read and likewise for atomic ops.  Note
-                * that the NAK case can only happen if relaxed ordering is
-                * used and requests are sent after an RDMA read or atomic
-                * is sent but before the response is received.
-                */
-               if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-                    (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
-                   ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-                    (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
-                       /*
-                        * The last valid PSN seen is the previous
-                        * request's.
-                        */
-                       update_last_psn(qp, wqe->psn - 1);
-                       /* Retry this request. */
-                       ipath_restart_rc(qp, wqe->psn);
-                       /*
-                        * No need to process the ACK/NAK since we are
-                        * restarting an earlier request.
-                        */
-                       goto bail;
-               }
-               if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                   wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-                       *(u64 *) wqe->sg_list[0].vaddr = val;
-               if (qp->s_num_rd_atomic &&
-                   (wqe->wr.opcode == IB_WR_RDMA_READ ||
-                    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
-                       qp->s_num_rd_atomic--;
-                       /* Restart sending task if fence is complete */
-                       if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
-                            !qp->s_num_rd_atomic) ||
-                           qp->s_flags & IPATH_S_RDMAR_PENDING)
-                               ipath_schedule_send(qp);
-               }
-               /* Post a send completion queue entry if requested. */
-               if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-                   (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-                       memset(&wc, 0, sizeof wc);
-                       wc.wr_id = wqe->wr.wr_id;
-                       wc.status = IB_WC_SUCCESS;
-                       wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-                       wc.byte_len = wqe->length;
-                       wc.qp = &qp->ibqp;
-                       wc.src_qp = qp->remote_qpn;
-                       wc.slid = qp->remote_ah_attr.dlid;
-                       wc.sl = qp->remote_ah_attr.sl;
-                       ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
-               }
-               qp->s_retry = qp->s_retry_cnt;
-               /*
-                * If we are completing a request which is in the process of
-                * being resent, we can stop resending it since we know the
-                * responder has already seen it.
-                */
-               if (qp->s_last == qp->s_cur) {
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       qp->s_last = qp->s_cur;
-                       if (qp->s_last == qp->s_tail)
-                               break;
-                       wqe = get_swqe_ptr(qp, qp->s_cur);
-                       qp->s_state = OP(SEND_LAST);
-                       qp->s_psn = wqe->psn;
-               } else {
-                       if (++qp->s_last >= qp->s_size)
-                               qp->s_last = 0;
-                       if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
-                               qp->s_draining = 0;
-                       if (qp->s_last == qp->s_tail)
-                               break;
-                       wqe = get_swqe_ptr(qp, qp->s_last);
-               }
-       }
-
-       switch (aeth >> 29) {
-       case 0:         /* ACK */
-               dev->n_rc_acks++;
-               /* If this is a partial ACK, reset the retransmit timer. */
-               if (qp->s_last != qp->s_tail) {
-                       spin_lock(&dev->pending_lock);
-                       if (list_empty(&qp->timerwait))
-                               list_add_tail(&qp->timerwait,
-                                       &dev->pending[dev->pending_index]);
-                       spin_unlock(&dev->pending_lock);
-                       /*
-                        * If we get a partial ACK for a resent operation,
-                        * we can stop resending the earlier packets and
-                        * continue with the next packet the receiver wants.
-                        */
-                       if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-                               reset_psn(qp, psn + 1);
-                               ipath_schedule_send(qp);
-                       }
-               } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-                       qp->s_state = OP(SEND_LAST);
-                       qp->s_psn = psn + 1;
-               }
-               ipath_get_credit(qp, aeth);
-               qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-               qp->s_retry = qp->s_retry_cnt;
-               update_last_psn(qp, psn);
-               ret = 1;
-               goto bail;
-
-       case 1:         /* RNR NAK */
-               dev->n_rnr_naks++;
-               if (qp->s_last == qp->s_tail)
-                       goto bail;
-               if (qp->s_rnr_retry == 0) {
-                       status = IB_WC_RNR_RETRY_EXC_ERR;
-                       goto class_b;
-               }
-               if (qp->s_rnr_retry_cnt < 7)
-                       qp->s_rnr_retry--;
-
-               /* The last valid PSN is the previous PSN. */
-               update_last_psn(qp, psn - 1);
-
-               if (wqe->wr.opcode == IB_WR_RDMA_READ)
-                       dev->n_rc_resends++;
-               else
-                       dev->n_rc_resends +=
-                               (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-               reset_psn(qp, psn);
-
-               qp->s_rnr_timeout =
-                       ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
-                                          IPATH_AETH_CREDIT_MASK];
-               ipath_insert_rnr_queue(qp);
-               ipath_schedule_send(qp);
-               goto bail;
-
-       case 3:         /* NAK */
-               if (qp->s_last == qp->s_tail)
-                       goto bail;
-               /* The last valid PSN is the previous PSN. */
-               update_last_psn(qp, psn - 1);
-               switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
-                       IPATH_AETH_CREDIT_MASK) {
-               case 0: /* PSN sequence error */
-                       dev->n_seq_naks++;
-                       /*
-                        * Back up to the responder's expected PSN.
-                        * Note that we might get a NAK in the middle of an
-                        * RDMA READ response which terminates the RDMA
-                        * READ.
-                        */
-                       ipath_restart_rc(qp, psn);
-                       break;
-
-               case 1: /* Invalid Request */
-                       status = IB_WC_REM_INV_REQ_ERR;
-                       dev->n_other_naks++;
-                       goto class_b;
-
-               case 2: /* Remote Access Error */
-                       status = IB_WC_REM_ACCESS_ERR;
-                       dev->n_other_naks++;
-                       goto class_b;
-
-               case 3: /* Remote Operation Error */
-                       status = IB_WC_REM_OP_ERR;
-                       dev->n_other_naks++;
-               class_b:
-                       ipath_send_complete(qp, wqe, status);
-                       ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-                       break;
-
-               default:
-                       /* Ignore other reserved NAK error codes */
-                       goto reserved;
-               }
-               qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-               goto bail;
-
-       default:                /* 2: reserved */
-       reserved:
-               /* Ignore reserved NAK codes. */
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_rc_rcv_resp - process an incoming RC response packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @hdrsize: the header length
- * @pmtu: the path MTU
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an incoming RC response
- * packet for the given QP.
- * Called at interrupt level.
- */
-static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
-                                    struct ipath_other_headers *ohdr,
-                                    void *data, u32 tlen,
-                                    struct ipath_qp *qp,
-                                    u32 opcode,
-                                    u32 psn, u32 hdrsize, u32 pmtu,
-                                    int header_in_data)
-{
-       struct ipath_swqe *wqe;
-       enum ib_wc_status status;
-       unsigned long flags;
-       int diff;
-       u32 pad;
-       u32 aeth;
-       u64 val;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto ack_done;
-
-       /* Ignore invalid responses. */
-       if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
-               goto ack_done;
-
-       /* Ignore duplicate responses. */
-       diff = ipath_cmp24(psn, qp->s_last_psn);
-       if (unlikely(diff <= 0)) {
-               /* Update credits for "ghost" ACKs */
-               if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
-                       if (!header_in_data)
-                               aeth = be32_to_cpu(ohdr->u.aeth);
-                       else {
-                               aeth = be32_to_cpu(((__be32 *) data)[0]);
-                               data += sizeof(__be32);
-                       }
-                       if ((aeth >> 29) == 0)
-                               ipath_get_credit(qp, aeth);
-               }
-               goto ack_done;
-       }
-
-       if (unlikely(qp->s_last == qp->s_tail))
-               goto ack_done;
-       wqe = get_swqe_ptr(qp, qp->s_last);
-       status = IB_WC_SUCCESS;
-
-       switch (opcode) {
-       case OP(ACKNOWLEDGE):
-       case OP(ATOMIC_ACKNOWLEDGE):
-       case OP(RDMA_READ_RESPONSE_FIRST):
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else {
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               }
-               if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-                       if (!header_in_data) {
-                               __be32 *p = ohdr->u.at.atomic_ack_eth;
-
-                               val = ((u64) be32_to_cpu(p[0]) << 32) |
-                                       be32_to_cpu(p[1]);
-                       } else
-                               val = be64_to_cpu(((__be64 *) data)[0]);
-               } else
-                       val = 0;
-               if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
-                   opcode != OP(RDMA_READ_RESPONSE_FIRST))
-                       goto ack_done;
-               hdrsize += 4;
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-               qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
-               /*
-                * If this is a response to a resent RDMA read, we
-                * have to be careful to copy the data to the right
-                * location.
-                */
-               qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-                                                 wqe, psn, pmtu);
-               goto read_middle;
-
-       case OP(RDMA_READ_RESPONSE_MIDDLE):
-               /* no AETH, no ACK */
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-                               goto ack_done;
-                       qp->r_flags |= IPATH_R_RDMAR_SEQ;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-                       goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-       read_middle:
-               if (unlikely(tlen != (hdrsize + pmtu + 4)))
-                       goto ack_len_err;
-               if (unlikely(pmtu >= qp->s_rdma_read_len))
-                       goto ack_len_err;
-
-               /* We got a response so update the timeout. */
-               spin_lock(&dev->pending_lock);
-               if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
-                       list_move_tail(&qp->timerwait,
-                                      &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
-
-               if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
-                       qp->s_retry = qp->s_retry_cnt;
-
-               /*
-                * Update the RDMA receive state but do the copy w/o
-                * holding the locks and blocking interrupts.
-                */
-               qp->s_rdma_read_len -= pmtu;
-               update_last_psn(qp, psn);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
-               goto bail;
-
-       case OP(RDMA_READ_RESPONSE_ONLY):
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-               if (!do_rc_ack(qp, aeth, psn, opcode, 0))
-                       goto ack_done;
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /*
-                * Check that the data size is >= 0 && <= pmtu.
-                * Remember to account for the AETH header (4) and
-                * ICRC (4).
-                */
-               if (unlikely(tlen < (hdrsize + pad + 8)))
-                       goto ack_len_err;
-               /*
-                * If this is a response to a resent RDMA read, we
-                * have to be careful to copy the data to the right
-                * location.
-                */
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-                                                 wqe, psn, pmtu);
-               goto read_last;
-
-       case OP(RDMA_READ_RESPONSE_LAST):
-               /* ACKs READ req. */
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-                               goto ack_done;
-                       qp->r_flags |= IPATH_R_RDMAR_SEQ;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-                       goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /*
-                * Check that the data size is >= 1 && <= pmtu.
-                * Remember to account for the AETH header (4) and
-                * ICRC (4).
-                */
-               if (unlikely(tlen <= (hdrsize + pad + 8)))
-                       goto ack_len_err;
-       read_last:
-               tlen -= hdrsize + pad + 8;
-               if (unlikely(tlen != qp->s_rdma_read_len))
-                       goto ack_len_err;
-               if (!header_in_data)
-                       aeth = be32_to_cpu(ohdr->u.aeth);
-               else {
-                       aeth = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               }
-               ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-               (void) do_rc_ack(qp, aeth, psn,
-                                OP(RDMA_READ_RESPONSE_LAST), 0);
-               goto ack_done;
-       }
-
-ack_op_err:
-       status = IB_WC_LOC_QP_OP_ERR;
-       goto ack_err;
-
-ack_len_err:
-       status = IB_WC_LOC_LEN_ERR;
-ack_err:
-       ipath_send_complete(qp, wqe, status);
-       ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-ack_done:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-bail:
-       return;
-}
-
-/**
- * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @diff: the difference between the PSN and the expected PSN
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an unexpected
- * incoming RC packet for the given QP.
- * Called at interrupt level.
- * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent.
- */
-static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
-                                    struct ipath_other_headers *ohdr,
-                                    void *data,
-                                    struct ipath_qp *qp,
-                                    u32 opcode,
-                                    u32 psn,
-                                    int diff,
-                                    int header_in_data)
-{
-       struct ipath_ack_entry *e;
-       u8 i, prev;
-       int old_req;
-       unsigned long flags;
-
-       if (diff > 0) {
-               /*
-                * Packet sequence error.
-                * A NAK will ACK earlier sends and RDMA writes.
-                * Don't queue the NAK if we already sent one.
-                */
-               if (!qp->r_nak_state) {
-                       qp->r_nak_state = IB_NAK_PSN_ERROR;
-                       /* Use the expected PSN. */
-                       qp->r_ack_psn = qp->r_psn;
-                       goto send_ack;
-               }
-               goto done;
-       }
-
-       /*
-        * Handle a duplicate request.  Don't re-execute SEND, RDMA
-        * write or atomic op.  Don't NAK errors, just silently drop
-        * the duplicate request.  Note that r_sge, r_len, and
-        * r_rcv_len may be in use so don't modify them.
-        *
-        * We are supposed to ACK the earliest duplicate PSN but we
-        * can coalesce an outstanding duplicate ACK.  We have to
-        * send the earliest so that RDMA reads can be restarted at
-        * the requester's expected PSN.
-        *
-        * First, find where this duplicate PSN falls within the
-        * ACKs previously sent.
-        */
-       psn &= IPATH_PSN_MASK;
-       e = NULL;
-       old_req = 1;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-               goto unlock_done;
-
-       for (i = qp->r_head_ack_queue; ; i = prev) {
-               if (i == qp->s_tail_ack_queue)
-                       old_req = 0;
-               if (i)
-                       prev = i - 1;
-               else
-                       prev = IPATH_MAX_RDMA_ATOMIC;
-               if (prev == qp->r_head_ack_queue) {
-                       e = NULL;
-                       break;
-               }
-               e = &qp->s_ack_queue[prev];
-               if (!e->opcode) {
-                       e = NULL;
-                       break;
-               }
-               if (ipath_cmp24(psn, e->psn) >= 0) {
-                       if (prev == qp->s_tail_ack_queue)
-                               old_req = 0;
-                       break;
-               }
-       }
-       switch (opcode) {
-       case OP(RDMA_READ_REQUEST): {
-               struct ib_reth *reth;
-               u32 offset;
-               u32 len;
-
-               /*
-                * If we didn't find the RDMA read request in the ack queue,
-                * or the send tasklet is already backed up to send an
-                * earlier entry, we can ignore this request.
-                */
-               if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
-                       goto unlock_done;
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               /*
-                * Address range must be a subset of the original
-                * request and start on pmtu boundaries.
-                * We reuse the old ack_queue slot since the requester
-                * should not back up and request an earlier PSN for the
-                * same request.
-                */
-               offset = ((psn - e->psn) & IPATH_PSN_MASK) *
-                       ib_mtu_enum_to_int(qp->path_mtu);
-               len = be32_to_cpu(reth->length);
-               if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
-                       goto unlock_done;
-               if (len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       ok = ipath_rkey_ok(qp, &e->rdma_sge,
-                                          len, vaddr, rkey,
-                                          IB_ACCESS_REMOTE_READ);
-                       if (unlikely(!ok))
-                               goto unlock_done;
-               } else {
-                       e->rdma_sge.sg_list = NULL;
-                       e->rdma_sge.num_sge = 0;
-                       e->rdma_sge.sge.mr = NULL;
-                       e->rdma_sge.sge.vaddr = NULL;
-                       e->rdma_sge.sge.length = 0;
-                       e->rdma_sge.sge.sge_length = 0;
-               }
-               e->psn = psn;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = prev;
-               break;
-       }
-
-       case OP(COMPARE_SWAP):
-       case OP(FETCH_ADD): {
-               /*
-                * If we didn't find the atomic request in the ack queue
-                * or the send tasklet is already backed up to send an
-                * earlier entry, we can ignore this request.
-                */
-               if (!e || e->opcode != (u8) opcode || old_req)
-                       goto unlock_done;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = prev;
-               break;
-       }
-
-       default:
-               if (old_req)
-                       goto unlock_done;
-               /*
-                * Resend the most recent ACK if this request is
-                * after all the previous RDMA reads and atomics.
-                */
-               if (i == qp->r_head_ack_queue) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       qp->r_nak_state = 0;
-                       qp->r_ack_psn = qp->r_psn - 1;
-                       goto send_ack;
-               }
-               /*
-                * Try to send a simple ACK to work around a Mellanox bug
-                * which doesn't accept a RDMA read response or atomic
-                * response as an ACK for earlier SENDs or RDMA writes.
-                */
-               if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
-                   !(qp->s_flags & IPATH_S_ACK_PENDING) &&
-                   qp->s_ack_state == OP(ACKNOWLEDGE)) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       qp->r_nak_state = 0;
-                       qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
-                       goto send_ack;
-               }
-               /*
-                * Resend the RDMA read or atomic op which
-                * ACKs this duplicate request.
-                */
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-               qp->s_tail_ack_queue = i;
-               break;
-       }
-       qp->r_nak_state = 0;
-       ipath_schedule_send(qp);
-
-unlock_done:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return 1;
-
-send_ack:
-       return 0;
-}
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
-{
-       unsigned long flags;
-       int lastwqe;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-       lastwqe = ipath_error_qp(qp, err);
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       if (lastwqe) {
-               struct ib_event ev;
-
-               ev.device = qp->ibqp.device;
-               ev.element.qp = &qp->ibqp;
-               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-       }
-}
-
-static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
-{
-       unsigned next;
-
-       next = n + 1;
-       if (next > IPATH_MAX_RDMA_ATOMIC)
-               next = 0;
-       if (n == qp->s_tail_ack_queue) {
-               qp->s_tail_ack_queue = next;
-               qp->s_ack_state = OP(ACKNOWLEDGE);
-       }
-}
-
-/**
- * ipath_rc_rcv - process an incoming RC packet
- * @dev: the device this packet came in on
- * @hdr: the header of this packet
- * @has_grh: true if the header has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- *
- * This is called from ipath_qp_rcv() to process an incoming RC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       u32 opcode;
-       u32 hdrsize;
-       u32 psn;
-       u32 pad;
-       struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       int diff;
-       struct ib_reth *reth;
-       int header_in_data;
-       unsigned long flags;
-
-       /* Validate the SLID. See Ch. 9.6.1.5 */
-       if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-               goto done;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12;       /* LRH + BTH */
-               psn = be32_to_cpu(ohdr->bth[2]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
-               /*
-                * The header with GRH is 60 bytes and the core driver sets
-                * the eager header buffer size to 56 bytes so the last 4
-                * bytes of the BTH header (PSN) is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       psn = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               } else
-                       psn = be32_to_cpu(ohdr->bth[2]);
-       }
-
-       /*
-        * Process responses (ACKs) before anything else.  Note that the
-        * packet sequence number will be for something in the send work
-        * queue rather than the expected receive packet sequence number.
-        * In other words, this QP is the requester.
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-       if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
-           opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
-               ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
-                                 hdrsize, pmtu, header_in_data);
-               goto done;
-       }
-
-       /* Compute 24 bits worth of difference. */
-       diff = ipath_cmp24(psn, qp->r_psn);
-       if (unlikely(diff)) {
-               if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
-                                      psn, diff, header_in_data))
-                       goto done;
-               goto send_ack;
-       }
-
-       /* Check for opcode sequence errors. */
-       switch (qp->r_state) {
-       case OP(SEND_FIRST):
-       case OP(SEND_MIDDLE):
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-                       break;
-               goto nack_inv;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_MIDDLE):
-               if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       break;
-               goto nack_inv;
-
-       default:
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       goto nack_inv;
-               /*
-                * Note that it is up to the requester to not send a new
-                * RDMA read or atomic operation before receiving an ACK
-                * for the previous operation.
-                */
-               break;
-       }
-
-       memset(&wc, 0, sizeof wc);
-
-       /* OK, process the packet. */
-       switch (opcode) {
-       case OP(SEND_FIRST):
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               qp->r_rcv_len = 0;
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-       case OP(RDMA_WRITE_MIDDLE):
-       send_middle:
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4)))
-                       goto nack_inv;
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len))
-                       goto nack_inv;
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-               /* consume RWQE */
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               goto send_last_imm;
-
-       case OP(SEND_ONLY):
-       case OP(SEND_ONLY_WITH_IMMEDIATE):
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               qp->r_rcv_len = 0;
-               if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST_WITH_IMMEDIATE):
-       send_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST):
-       case OP(RDMA_WRITE_LAST):
-       send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4)))
-                       goto nack_inv;
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               wc.byte_len = tlen + qp->r_rcv_len;
-               if (unlikely(wc.byte_len > qp->r_len))
-                       goto nack_inv;
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               qp->r_msn++;
-               if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-                       break;
-               wc.wr_id = qp->r_wr_id;
-               wc.status = IB_WC_SUCCESS;
-               if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               else
-                       wc.opcode = IB_WC_RECV;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = qp->remote_qpn;
-               wc.slid = qp->remote_ah_attr.dlid;
-               wc.sl = qp->remote_ah_attr.sl;
-               /* Signal completion event if the solicited bit is set. */
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                              (ohdr->bth[0] &
-                               cpu_to_be32(1 << 23)) != 0);
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_ONLY):
-       case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE)))
-                       goto nack_inv;
-               /* consume RWQE */
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               hdrsize += sizeof(*reth);
-               qp->r_len = be32_to_cpu(reth->length);
-               qp->r_rcv_len = 0;
-               if (qp->r_len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey & NAK */
-                       ok = ipath_rkey_ok(qp, &qp->r_sge,
-                                          qp->r_len, vaddr, rkey,
-                                          IB_ACCESS_REMOTE_WRITE);
-                       if (unlikely(!ok))
-                               goto nack_acc;
-               } else {
-                       qp->r_sge.sg_list = NULL;
-                       qp->r_sge.sge.mr = NULL;
-                       qp->r_sge.sge.vaddr = NULL;
-                       qp->r_sge.sge.length = 0;
-                       qp->r_sge.sge.sge_length = 0;
-               }
-               if (opcode == OP(RDMA_WRITE_FIRST))
-                       goto send_middle;
-               else if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto send_last;
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               goto send_last_imm;
-
-       case OP(RDMA_READ_REQUEST): {
-               struct ipath_ack_entry *e;
-               u32 len;
-               u8 next;
-
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_READ)))
-                       goto nack_inv;
-               next = qp->r_head_ack_queue + 1;
-               if (next > IPATH_MAX_RDMA_ATOMIC)
-                       next = 0;
-               spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-                       goto unlock;
-               if (unlikely(next == qp->s_tail_ack_queue)) {
-                       if (!qp->s_ack_queue[next].sent)
-                               goto nack_inv_unlck;
-                       ipath_update_ack_queue(qp, next);
-               }
-               e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               len = be32_to_cpu(reth->length);
-               if (len) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey & NAK */
-                       ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-                                          rkey, IB_ACCESS_REMOTE_READ);
-                       if (unlikely(!ok))
-                               goto nack_acc_unlck;
-                       /*
-                        * Update the next expected PSN.  We add 1 later
-                        * below, so only add the remainder here.
-                        */
-                       if (len > pmtu)
-                               qp->r_psn += (len - 1) / pmtu;
-               } else {
-                       e->rdma_sge.sg_list = NULL;
-                       e->rdma_sge.num_sge = 0;
-                       e->rdma_sge.sge.mr = NULL;
-                       e->rdma_sge.sge.vaddr = NULL;
-                       e->rdma_sge.sge.length = 0;
-                       e->rdma_sge.sge.sge_length = 0;
-               }
-               e->opcode = opcode;
-               e->sent = 0;
-               e->psn = psn;
-               /*
-                * We need to increment the MSN here instead of when we
-                * finish sending the result since a duplicate request would
-                * increment it more than once.
-                */
-               qp->r_msn++;
-               qp->r_psn++;
-               qp->r_state = opcode;
-               qp->r_nak_state = 0;
-               qp->r_head_ack_queue = next;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-
-               goto unlock;
-       }
-
-       case OP(COMPARE_SWAP):
-       case OP(FETCH_ADD): {
-               struct ib_atomic_eth *ateth;
-               struct ipath_ack_entry *e;
-               u64 vaddr;
-               atomic64_t *maddr;
-               u64 sdata;
-               u32 rkey;
-               u8 next;
-
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_inv;
-               next = qp->r_head_ack_queue + 1;
-               if (next > IPATH_MAX_RDMA_ATOMIC)
-                       next = 0;
-               spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-                       goto unlock;
-               if (unlikely(next == qp->s_tail_ack_queue)) {
-                       if (!qp->s_ack_queue[next].sent)
-                               goto nack_inv_unlck;
-                       ipath_update_ack_queue(qp, next);
-               }
-               if (!header_in_data)
-                       ateth = &ohdr->u.atomic_eth;
-               else
-                       ateth = (struct ib_atomic_eth *)data;
-               vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
-                       be32_to_cpu(ateth->vaddr[1]);
-               if (unlikely(vaddr & (sizeof(u64) - 1)))
-                       goto nack_inv_unlck;
-               rkey = be32_to_cpu(ateth->rkey);
-               /* Check rkey & NAK */
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
-                                           sizeof(u64), vaddr, rkey,
-                                           IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_acc_unlck;
-               /* Perform atomic OP and save result. */
-               maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = be64_to_cpu(ateth->swap_data);
-               e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-                       (u64) atomic64_add_return(sdata, maddr) - sdata :
-                       (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     be64_to_cpu(ateth->compare_data),
-                                     sdata);
-               e->opcode = opcode;
-               e->sent = 0;
-               e->psn = psn & IPATH_PSN_MASK;
-               qp->r_msn++;
-               qp->r_psn++;
-               qp->r_state = opcode;
-               qp->r_nak_state = 0;
-               qp->r_head_ack_queue = next;
-
-               /* Schedule the send tasklet. */
-               ipath_schedule_send(qp);
-
-               goto unlock;
-       }
-
-       default:
-               /* NAK unknown opcodes. */
-               goto nack_inv;
-       }
-       qp->r_psn++;
-       qp->r_state = opcode;
-       qp->r_ack_psn = psn;
-       qp->r_nak_state = 0;
-       /* Send an ACK if requested or required. */
-       if (psn & (1 << 31))
-               goto send_ack;
-       goto done;
-
-rnr_nak:
-       qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
-       qp->r_ack_psn = qp->r_psn;
-       goto send_ack;
-
-nack_inv_unlck:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_inv:
-       ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-       qp->r_nak_state = IB_NAK_INVALID_REQUEST;
-       qp->r_ack_psn = qp->r_psn;
-       goto send_ack;
-
-nack_acc_unlck:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_acc:
-       ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
-       qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-       qp->r_ack_psn = qp->r_psn;
-send_ack:
-       send_rc_ack(qp);
-       goto done;
-
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_registers.h b/drivers/staging/rdma/ipath/ipath_registers.h
deleted file mode 100644 (file)
index 8f44d0c..0000000
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_REGISTERS_H
-#define _IPATH_REGISTERS_H
-
-/*
- * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for InfiniPath chips.
- */
-
-/*
- * These are the InfiniPath register and buffer bit definitions,
- * that are visible to software, and needed only by the kernel
- * and diag code.  A few, that are visible to protocol and user
- * code are in ipath_common.h.  Some bits are specific
- * to a given chip implementation, and have been moved to the
- * chip-specific source file
- */
-
-/* kr_revision bits */
-#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
-#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
-#define INFINIPATH_R_ARCH_MASK 0xFF
-#define INFINIPATH_R_ARCH_SHIFT 16
-#define INFINIPATH_R_SOFTWARE_MASK 0xFF
-#define INFINIPATH_R_SOFTWARE_SHIFT 24
-#define INFINIPATH_R_BOARDID_MASK 0xFF
-#define INFINIPATH_R_BOARDID_SHIFT 32
-
-/* kr_control bits */
-#define INFINIPATH_C_FREEZEMODE 0x00000002
-#define INFINIPATH_C_LINKENABLE 0x00000004
-
-/* kr_sendctrl bits */
-#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
-#define INFINIPATH_S_UPDTHRESH_SHIFT 24
-#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
-
-#define IPATH_S_ABORT          0
-#define IPATH_S_PIOINTBUFAVAIL 1
-#define IPATH_S_PIOBUFAVAILUPD 2
-#define IPATH_S_PIOENABLE      3
-#define IPATH_S_SDMAINTENABLE  9
-#define IPATH_S_SDMASINGLEDESCRIPTOR   10
-#define IPATH_S_SDMAENABLE     11
-#define IPATH_S_SDMAHALT       12
-#define IPATH_S_DISARM         31
-
-#define INFINIPATH_S_ABORT             (1U << IPATH_S_ABORT)
-#define INFINIPATH_S_PIOINTBUFAVAIL    (1U << IPATH_S_PIOINTBUFAVAIL)
-#define INFINIPATH_S_PIOBUFAVAILUPD    (1U << IPATH_S_PIOBUFAVAILUPD)
-#define INFINIPATH_S_PIOENABLE         (1U << IPATH_S_PIOENABLE)
-#define INFINIPATH_S_SDMAINTENABLE     (1U << IPATH_S_SDMAINTENABLE)
-#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
-                                       (1U << IPATH_S_SDMASINGLEDESCRIPTOR)
-#define INFINIPATH_S_SDMAENABLE                (1U << IPATH_S_SDMAENABLE)
-#define INFINIPATH_S_SDMAHALT          (1U << IPATH_S_SDMAHALT)
-#define INFINIPATH_S_DISARM            (1U << IPATH_S_DISARM)
-
-/* kr_rcvctrl bits that are the same on multiple chips */
-#define INFINIPATH_R_PORTENABLE_SHIFT 0
-#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_SDMAINT           0x8000000000000000ULL
-#define INFINIPATH_I_SDMADISABLED      0x4000000000000000ULL
-#define INFINIPATH_I_ERROR             0x0000000080000000ULL
-#define INFINIPATH_I_SPIOSENT          0x0000000040000000ULL
-#define INFINIPATH_I_SPIOBUFAVAIL      0x0000000020000000ULL
-#define INFINIPATH_I_GPIO              0x0000000010000000ULL
-#define INFINIPATH_I_JINT              0x0000000004000000ULL
-
-/* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR                        0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC                     0x0000000000000002ULL
-#define INFINIPATH_E_RICRC                     0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN                        0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN                        0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN               0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN              0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR                        0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL                  0x0000000000000100ULL
-#define INFINIPATH_E_REBP                      0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW                   0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION               0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL               0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL               0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID                   0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN                   0x0000000000008000ULL
-#define INFINIPATH_E_RHDR                      0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK               0x0000000000020000ULL
-#define INFINIPATH_E_SENDSPECIALTRIGGER                0x0000000008000000ULL
-#define INFINIPATH_E_SDMADISABLED              0x0000000010000000ULL
-#define INFINIPATH_E_SMINPKTLEN                        0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN                        0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN                 0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN                   0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT            0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT           0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH             0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM           0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL                  0x0000002000000000ULL
-#define INFINIPATH_E_SENDBUFMISUSE             0x0000004000000000ULL
-#define INFINIPATH_E_SDMAGENMISMATCH           0x0000008000000000ULL
-#define INFINIPATH_E_SDMAOUTOFBOUND            0x0000010000000000ULL
-#define INFINIPATH_E_SDMATAILOUTOFBOUND                0x0000020000000000ULL
-#define INFINIPATH_E_SDMABASE                  0x0000040000000000ULL
-#define INFINIPATH_E_SDMA1STDESC               0x0000080000000000ULL
-#define INFINIPATH_E_SDMARPYTAG                        0x0000100000000000ULL
-#define INFINIPATH_E_SDMADWEN                  0x0000200000000000ULL
-#define INFINIPATH_E_SDMAMISSINGDW             0x0000400000000000ULL
-#define INFINIPATH_E_SDMAUNEXPDATA             0x0000800000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED           0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR               0x0002000000000000ULL
-#define INFINIPATH_E_RESET                     0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE                  0x0008000000000000ULL
-#define INFINIPATH_E_SDMADESCADDRMISALIGN      0x0010000000000000ULL
-#define INFINIPATH_E_INVALIDEEPCMD             0x0020000000000000ULL
-
-/*
- * this is used to print "common" packet errors only when the
- * __IPATH_ERRPKTDBG bit is set in ipath_debug.
- */
-#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
-               | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
-               | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
-               | INFINIPATH_E_REBP )
-
-/* Convenience for decoding Send DMA errors */
-#define INFINIPATH_E_SDMAERRS ( \
-       INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
-       INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
-       INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
-       INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
-       INFINIPATH_E_SDMAUNEXPDATA | \
-       INFINIPATH_E_SDMADESCADDRMISALIGN | \
-       INFINIPATH_E_SDMADISABLED | \
-       INFINIPATH_E_SENDBUFMISUSE)
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
- *             bit 4: flag buffer, 5: datainfo, 6: header info */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF  0x1ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC  0x2ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
-/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
-/* waldo specific -- find the rest in ipath_6110.c */
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
-#define INFINIPATH_HWE_MEMBISTFAILED   0x0040000000000000ULL
-
-/* kr_hwdiagctrl bits */
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_DC_COUNTERDISABLE            0x1000000000000000ULL
-#define INFINIPATH_DC_COUNTERWREN               0x2000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-
-/* kr_ibcctrl bits */
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
-#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
-/* cycle through TS1/TS2 till OK */
-#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
-/* wait for TS1, then go on */
-#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
-#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
-#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_DOWN 1         /* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2                /* move to 0x21 */
-#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3       /* move to 0x31 */
-#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
-#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
-#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
-#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
-#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
-#define INFINIPATH_IBCC_LOOPBACK             0x8000000000000000ULL
-#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
-
-/* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
-#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-
-#define INFINIPATH_IBCS_TXREADY       0x40000000
-#define INFINIPATH_IBCS_TXCREDITOK    0x80000000
-/* link training states (shift by
-   INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
-#define INFINIPATH_IBCS_LT_STATE_DISABLED      0x00
-#define INFINIPATH_IBCS_LT_STATE_LINKUP                0x01
-#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE    0x02
-#define INFINIPATH_IBCS_LT_STATE_POLLQUIET     0x03
-#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY    0x04
-#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET    0x05
-#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE   0x08
-#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG    0x09
-#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT    0x0a
-#define INFINIPATH_IBCS_LT_STATE_CFGIDLE       0x0b
-#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN        0x0c
-#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT        0x0e
-#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE   0x0f
-/* link state machine states (shift by ibcs_ls_shift) */
-#define INFINIPATH_IBCS_L_STATE_DOWN           0x0
-#define INFINIPATH_IBCS_L_STATE_INIT           0x1
-#define INFINIPATH_IBCS_L_STATE_ARM            0x2
-#define INFINIPATH_IBCS_L_STATE_ACTIVE         0x3
-#define INFINIPATH_IBCS_L_STATE_ACT_DEFER      0x4
-
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
-#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
-#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
-
-/* kr_extctrl bits */
-#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
-#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
-#define INFINIPATH_EXTC_SERDESENABLE         0x80000000ULL
-#define INFINIPATH_EXTC_SERDESCONNECT        0x40000000ULL
-#define INFINIPATH_EXTC_SERDESENTRUNKING     0x20000000ULL
-#define INFINIPATH_EXTC_SERDESDISRXFIFO      0x10000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK1       0x08000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK2       0x04000000ULL
-#define INFINIPATH_EXTC_SERDESENENCDEC       0x02000000ULL
-#define INFINIPATH_EXTC_LED1SECPORT_ON       0x00000020ULL
-#define INFINIPATH_EXTC_LED2SECPORT_ON       0x00000010ULL
-#define INFINIPATH_EXTC_LED1PRIPORT_ON       0x00000008ULL
-#define INFINIPATH_EXTC_LED2PRIPORT_ON       0x00000004ULL
-#define INFINIPATH_EXTC_LEDGBLOK_ON          0x00000002ULL
-#define INFINIPATH_EXTC_LEDGBLERR_OFF        0x00000001ULL
-
-/* kr_partitionkey bits */
-#define INFINIPATH_PKEY_SIZE 16
-#define INFINIPATH_PKEY_MASK 0xFFFF
-#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
-
-/* kr_serdesconfig0 bits */
-#define INFINIPATH_SERDC0_RESET_MASK  0xfULL   /* overal reset bits */
-#define INFINIPATH_SERDC0_RESET_PLL   0x10000000ULL    /* pll reset */
-/* tx idle enables (per lane) */
-#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL
-/* rx detect enables (per lane) */
-#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
-/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
-#define INFINIPATH_SERDC0_L1PWR_DN      0xF0ULL
-
-/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
-#define INFINIPATH_XGXS_RX_POL_SHIFT 19
-#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-
-
-/*
- * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
- * PIO send buffers.  This is well beyond anything currently
- * defined in the InfiniBand spec.
- */
-#define IPATH_PIO_MAXIBHDR 128
-
-typedef u64 ipath_err_t;
-
-/* The following change with the type of device, so
- * need to be part of the ipath_devdata struct, or
- * we could have problems plugging in devices of
- * different types (e.g. one HT, one PCIE)
- * in one system, to be managed by one driver.
- * On the other hand, this file is may also be included
- * by other code, so leave the declarations here
- * temporarily. Minor footprint issue if common-model
- * linker used, none if C89+ linker used.
- */
-
-/* mask of defined bits for various registers */
-extern u64 infinipath_i_bitsextant;
-extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-
-/* masks that are different in various chips, or only exist in some chips */
-extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
-/*
- * These are the infinipath general register numbers (not offsets).
- * The kernel registers are used directly, those beyond the kernel
- * registers are calculated from one of the base registers.  The use of
- * an integer type doesn't allow type-checking as thorough as, say,
- * an enum but allows for better hiding of chip differences.
- */
-typedef const u16 ipath_kreg,  /* infinipath general registers */
- ipath_creg,                   /* infinipath counter registers */
- ipath_sreg;                   /* kernel-only, infinipath send registers */
-
-/*
- * These are the chip registers common to all infinipath chips, and
- * used both by the kernel and the diagnostics or other user code.
- * They are all implemented such that 64 bit accesses work.
- * Some implement no more than 32 bits.  Because 64 bit reads
- * require 2 HT cmds on opteron, we access those with 32 bit
- * reads for efficiency (they are written as 64 bits, since
- * the extra 32 bits are nearly free on writes, and it slightly reduces
- * complexity).  The rest are all accessed as 64 bits.
- */
-struct ipath_kregs {
-       /* These are the 32 bit group */
-       ipath_kreg kr_control;
-       ipath_kreg kr_counterregbase;
-       ipath_kreg kr_intmask;
-       ipath_kreg kr_intstatus;
-       ipath_kreg kr_pagealign;
-       ipath_kreg kr_portcnt;
-       ipath_kreg kr_rcvtidbase;
-       ipath_kreg kr_rcvtidcnt;
-       ipath_kreg kr_rcvegrbase;
-       ipath_kreg kr_rcvegrcnt;
-       ipath_kreg kr_scratch;
-       ipath_kreg kr_sendctrl;
-       ipath_kreg kr_sendpiobufbase;
-       ipath_kreg kr_sendpiobufcnt;
-       ipath_kreg kr_sendpiosize;
-       ipath_kreg kr_sendregbase;
-       ipath_kreg kr_userregbase;
-       /* These are the 64 bit group */
-       ipath_kreg kr_debugport;
-       ipath_kreg kr_debugportselect;
-       ipath_kreg kr_errorclear;
-       ipath_kreg kr_errormask;
-       ipath_kreg kr_errorstatus;
-       ipath_kreg kr_extctrl;
-       ipath_kreg kr_extstatus;
-       ipath_kreg kr_gpio_clear;
-       ipath_kreg kr_gpio_mask;
-       ipath_kreg kr_gpio_out;
-       ipath_kreg kr_gpio_status;
-       ipath_kreg kr_hwdiagctrl;
-       ipath_kreg kr_hwerrclear;
-       ipath_kreg kr_hwerrmask;
-       ipath_kreg kr_hwerrstatus;
-       ipath_kreg kr_ibcctrl;
-       ipath_kreg kr_ibcstatus;
-       ipath_kreg kr_intblocked;
-       ipath_kreg kr_intclear;
-       ipath_kreg kr_interruptconfig;
-       ipath_kreg kr_mdio;
-       ipath_kreg kr_partitionkey;
-       ipath_kreg kr_rcvbthqp;
-       ipath_kreg kr_rcvbufbase;
-       ipath_kreg kr_rcvbufsize;
-       ipath_kreg kr_rcvctrl;
-       ipath_kreg kr_rcvhdrcnt;
-       ipath_kreg kr_rcvhdrentsize;
-       ipath_kreg kr_rcvhdrsize;
-       ipath_kreg kr_rcvintmembase;
-       ipath_kreg kr_rcvintmemsize;
-       ipath_kreg kr_revision;
-       ipath_kreg kr_sendbuffererror;
-       ipath_kreg kr_sendpioavailaddr;
-       ipath_kreg kr_serdesconfig0;
-       ipath_kreg kr_serdesconfig1;
-       ipath_kreg kr_serdesstatus;
-       ipath_kreg kr_txintmembase;
-       ipath_kreg kr_txintmemsize;
-       ipath_kreg kr_xgxsconfig;
-       ipath_kreg kr_ibpllcfg;
-       /* use these two (and the following N ports) only with
-        * ipath_k*_kreg64_port(); not *kreg64() */
-       ipath_kreg kr_rcvhdraddr;
-       ipath_kreg kr_rcvhdrtailaddr;
-
-       /* remaining registers are not present on all types of infinipath
-          chips  */
-       ipath_kreg kr_rcvpktledcnt;
-       ipath_kreg kr_pcierbuftestreg0;
-       ipath_kreg kr_pcierbuftestreg1;
-       ipath_kreg kr_pcieq0serdesconfig0;
-       ipath_kreg kr_pcieq0serdesconfig1;
-       ipath_kreg kr_pcieq0serdesstatus;
-       ipath_kreg kr_pcieq1serdesconfig0;
-       ipath_kreg kr_pcieq1serdesconfig1;
-       ipath_kreg kr_pcieq1serdesstatus;
-       ipath_kreg kr_hrtbt_guid;
-       ipath_kreg kr_ibcddrctrl;
-       ipath_kreg kr_ibcddrstatus;
-       ipath_kreg kr_jintreload;
-
-       /* send dma related regs */
-       ipath_kreg kr_senddmabase;
-       ipath_kreg kr_senddmalengen;
-       ipath_kreg kr_senddmatail;
-       ipath_kreg kr_senddmahead;
-       ipath_kreg kr_senddmaheadaddr;
-       ipath_kreg kr_senddmabufmask0;
-       ipath_kreg kr_senddmabufmask1;
-       ipath_kreg kr_senddmabufmask2;
-       ipath_kreg kr_senddmastatus;
-
-       /* SerDes related regs (IBA7220-only) */
-       ipath_kreg kr_ibserdesctrl;
-       ipath_kreg kr_ib_epbacc;
-       ipath_kreg kr_ib_epbtrans;
-       ipath_kreg kr_pcie_epbacc;
-       ipath_kreg kr_pcie_epbtrans;
-       ipath_kreg kr_ib_ddsrxeq;
-};
-
-struct ipath_cregs {
-       ipath_creg cr_badformatcnt;
-       ipath_creg cr_erricrccnt;
-       ipath_creg cr_errlinkcnt;
-       ipath_creg cr_errlpcrccnt;
-       ipath_creg cr_errpkey;
-       ipath_creg cr_errrcvflowctrlcnt;
-       ipath_creg cr_err_rlencnt;
-       ipath_creg cr_errslencnt;
-       ipath_creg cr_errtidfull;
-       ipath_creg cr_errtidvalid;
-       ipath_creg cr_errvcrccnt;
-       ipath_creg cr_ibstatuschange;
-       ipath_creg cr_intcnt;
-       ipath_creg cr_invalidrlencnt;
-       ipath_creg cr_invalidslencnt;
-       ipath_creg cr_lbflowstallcnt;
-       ipath_creg cr_iblinkdowncnt;
-       ipath_creg cr_iblinkerrrecovcnt;
-       ipath_creg cr_ibsymbolerrcnt;
-       ipath_creg cr_pktrcvcnt;
-       ipath_creg cr_pktrcvflowctrlcnt;
-       ipath_creg cr_pktsendcnt;
-       ipath_creg cr_pktsendflowcnt;
-       ipath_creg cr_portovflcnt;
-       ipath_creg cr_rcvebpcnt;
-       ipath_creg cr_rcvovflcnt;
-       ipath_creg cr_rxdroppktcnt;
-       ipath_creg cr_senddropped;
-       ipath_creg cr_sendstallcnt;
-       ipath_creg cr_sendunderruncnt;
-       ipath_creg cr_unsupvlcnt;
-       ipath_creg cr_wordrcvcnt;
-       ipath_creg cr_wordsendcnt;
-       ipath_creg cr_vl15droppedpktcnt;
-       ipath_creg cr_rxotherlocalphyerrcnt;
-       ipath_creg cr_excessbufferovflcnt;
-       ipath_creg cr_locallinkintegrityerrcnt;
-       ipath_creg cr_rxvlerrcnt;
-       ipath_creg cr_rxdlidfltrcnt;
-       ipath_creg cr_psstat;
-       ipath_creg cr_psstart;
-       ipath_creg cr_psinterval;
-       ipath_creg cr_psrcvdatacount;
-       ipath_creg cr_psrcvpktscount;
-       ipath_creg cr_psxmitdatacount;
-       ipath_creg cr_psxmitpktscount;
-       ipath_creg cr_psxmitwaitcount;
-};
-
-#endif                         /* _IPATH_REGISTERS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
deleted file mode 100644 (file)
index e541a01..0000000
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/*
- * Convert the AETH RNR timeout code into the number of milliseconds.
- */
-const u32 ib_ipath_rnr_table[32] = {
-       656,                    /* 0 */
-       1,                      /* 1 */
-       1,                      /* 2 */
-       1,                      /* 3 */
-       1,                      /* 4 */
-       1,                      /* 5 */
-       1,                      /* 6 */
-       1,                      /* 7 */
-       1,                      /* 8 */
-       1,                      /* 9 */
-       1,                      /* A */
-       1,                      /* B */
-       1,                      /* C */
-       1,                      /* D */
-       2,                      /* E */
-       2,                      /* F */
-       3,                      /* 10 */
-       4,                      /* 11 */
-       6,                      /* 12 */
-       8,                      /* 13 */
-       11,                     /* 14 */
-       16,                     /* 15 */
-       21,                     /* 16 */
-       31,                     /* 17 */
-       41,                     /* 18 */
-       62,                     /* 19 */
-       82,                     /* 1A */
-       123,                    /* 1B */
-       164,                    /* 1C */
-       246,                    /* 1D */
-       328,                    /* 1E */
-       492                     /* 1F */
-};
-
-/**
- * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
- * @qp: the QP
- *
- * Called with the QP s_lock held and interrupts disabled.
- * XXX Use a simple list for now.  We might need a priority
- * queue if we have lots of QPs waiting for RNR timeouts
- * but that should be rare.
- */
-void ipath_insert_rnr_queue(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-
-       /* We already did a spin_lock_irqsave(), so just use spin_lock */
-       spin_lock(&dev->pending_lock);
-       if (list_empty(&dev->rnrwait))
-               list_add(&qp->timerwait, &dev->rnrwait);
-       else {
-               struct list_head *l = &dev->rnrwait;
-               struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
-                                                 timerwait);
-
-               while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
-                       qp->s_rnr_timeout -= nqp->s_rnr_timeout;
-                       l = l->next;
-                       if (l->next == &dev->rnrwait) {
-                               nqp = NULL;
-                               break;
-                       }
-                       nqp = list_entry(l->next, struct ipath_qp,
-                                        timerwait);
-               }
-               if (nqp)
-                       nqp->s_rnr_timeout -= qp->s_rnr_timeout;
-               list_add(&qp->timerwait, l);
-       }
-       spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_init_sge - Validate a RWQE and fill in the SGE state
- * @qp: the QP
- *
- * Return 1 if OK.
- */
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                  u32 *lengthp, struct ipath_sge_state *ss)
-{
-       int i, j, ret;
-       struct ib_wc wc;
-
-       *lengthp = 0;
-       for (i = j = 0; i < wqe->num_sge; i++) {
-               if (wqe->sg_list[i].length == 0)
-                       continue;
-               /* Check LKEY */
-               if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
-                       goto bad_lkey;
-               *lengthp += wqe->sg_list[i].length;
-               j++;
-       }
-       ss->num_sge = j;
-       ret = 1;
-       goto bail;
-
-bad_lkey:
-       memset(&wc, 0, sizeof(wc));
-       wc.wr_id = wqe->wr_id;
-       wc.status = IB_WC_LOC_PROT_ERR;
-       wc.opcode = IB_WC_RECV;
-       wc.qp = &qp->ibqp;
-       /* Signal solicited completion event. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return 0 if no RWQE is available, otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
-{
-       unsigned long flags;
-       struct ipath_rq *rq;
-       struct ipath_rwq *wq;
-       struct ipath_srq *srq;
-       struct ipath_rwqe *wqe;
-       void (*handler)(struct ib_event *, void *);
-       u32 tail;
-       int ret;
-
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               handler = srq->ibsrq.event_handler;
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               handler = NULL;
-               rq = &qp->r_rq;
-       }
-
-       spin_lock_irqsave(&rq->lock, flags);
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               ret = 0;
-               goto unlock;
-       }
-
-       wq = rq->wq;
-       tail = wq->tail;
-       /* Validate tail before using it since it is user writable. */
-       if (tail >= rq->size)
-               tail = 0;
-       do {
-               if (unlikely(tail == wq->head)) {
-                       ret = 0;
-                       goto unlock;
-               }
-               /* Make sure entry is read after head index is read. */
-               smp_rmb();
-               wqe = get_rwqe_ptr(rq, tail);
-               if (++tail >= rq->size)
-                       tail = 0;
-               if (wr_id_only)
-                       break;
-               qp->r_sge.sg_list = qp->r_sg_list;
-       } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
-       qp->r_wr_id = wqe->wr_id;
-       wq->tail = tail;
-
-       ret = 1;
-       set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
-       if (handler) {
-               u32 n;
-
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               n = wq->head;
-               if (n >= rq->size)
-                       n = 0;
-               if (n < tail)
-                       n += rq->size - tail;
-               else
-                       n -= tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       handler(&ev, srq->ibsrq.srq_context);
-                       goto bail;
-               }
-       }
-unlock:
-       spin_unlock_irqrestore(&rq->lock, flags);
-bail:
-       return ret;
-}
-
-/**
- * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from ipath_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ipath_ruc_loopback(struct ipath_qp *sqp)
-{
-       struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-       struct ipath_qp *qp;
-       struct ipath_swqe *wqe;
-       struct ipath_sge *sge;
-       unsigned long flags;
-       struct ib_wc wc;
-       u64 sdata;
-       atomic64_t *maddr;
-       enum ib_wc_status send_status;
-
-       /*
-        * Note that we check the responder QP state after
-        * checking the requester's state.
-        */
-       qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
-
-       spin_lock_irqsave(&sqp->s_lock, flags);
-
-       /* Return if we are already busy processing a work request. */
-       if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-           !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-               goto unlock;
-
-       sqp->s_flags |= IPATH_S_BUSY;
-
-again:
-       if (sqp->s_last == sqp->s_head)
-               goto clr_busy;
-       wqe = get_swqe_ptr(sqp, sqp->s_last);
-
-       /* Return if it is not OK to start a new work reqeust. */
-       if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-               if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
-                       goto clr_busy;
-               /* We are in the error state, flush the work request. */
-               send_status = IB_WC_WR_FLUSH_ERR;
-               goto flush_send;
-       }
-
-       /*
-        * We can rely on the entry not changing without the s_lock
-        * being held until we update s_last.
-        * We increment s_cur to indicate s_last is in progress.
-        */
-       if (sqp->s_last == sqp->s_cur) {
-               if (++sqp->s_cur >= sqp->s_size)
-                       sqp->s_cur = 0;
-       }
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-       if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               /*
-                * For RC, the requester would timeout and retry so
-                * shortcut the timeouts and just signal too many retries.
-                */
-               if (sqp->ibqp.qp_type == IB_QPT_RC)
-                       send_status = IB_WC_RETRY_EXC_ERR;
-               else
-                       send_status = IB_WC_SUCCESS;
-               goto serr;
-       }
-
-       memset(&wc, 0, sizeof wc);
-       send_status = IB_WC_SUCCESS;
-
-       sqp->s_sge.sge = wqe->sg_list[0];
-       sqp->s_sge.sg_list = wqe->sg_list + 1;
-       sqp->s_sge.num_sge = wqe->wr.num_sge;
-       sqp->s_len = wqe->length;
-       switch (wqe->wr.opcode) {
-       case IB_WR_SEND_WITH_IMM:
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = wqe->wr.ex.imm_data;
-               /* FALLTHROUGH */
-       case IB_WR_SEND:
-               if (!ipath_get_rwqe(qp, 0))
-                       goto rnr_nak;
-               break;
-
-       case IB_WR_RDMA_WRITE_WITH_IMM:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-                       goto inv_err;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = wqe->wr.ex.imm_data;
-               if (!ipath_get_rwqe(qp, 1))
-                       goto rnr_nak;
-               /* FALLTHROUGH */
-       case IB_WR_RDMA_WRITE:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-                       goto inv_err;
-               if (wqe->length == 0)
-                       break;
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
-                                           wqe->rdma_wr.remote_addr,
-                                           wqe->rdma_wr.rkey,
-                                           IB_ACCESS_REMOTE_WRITE)))
-                       goto acc_err;
-               break;
-
-       case IB_WR_RDMA_READ:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-                       goto inv_err;
-               if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
-                                           wqe->rdma_wr.remote_addr,
-                                           wqe->rdma_wr.rkey,
-                                           IB_ACCESS_REMOTE_READ)))
-                       goto acc_err;
-               qp->r_sge.sge = wqe->sg_list[0];
-               qp->r_sge.sg_list = wqe->sg_list + 1;
-               qp->r_sge.num_sge = wqe->wr.num_sge;
-               break;
-
-       case IB_WR_ATOMIC_CMP_AND_SWP:
-       case IB_WR_ATOMIC_FETCH_AND_ADD:
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-                       goto inv_err;
-               if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-                                           wqe->atomic_wr.remote_addr,
-                                           wqe->atomic_wr.rkey,
-                                           IB_ACCESS_REMOTE_ATOMIC)))
-                       goto acc_err;
-               /* Perform atomic OP and save result. */
-               maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = wqe->atomic_wr.compare_add;
-               *(u64 *) sqp->s_sge.sge.vaddr =
-                       (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-                       (u64) atomic64_add_return(sdata, maddr) - sdata :
-                       (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     sdata, wqe->atomic_wr.swap);
-               goto send_comp;
-
-       default:
-               send_status = IB_WC_LOC_QP_OP_ERR;
-               goto serr;
-       }
-
-       sge = &sqp->s_sge.sge;
-       while (sqp->s_len) {
-               u32 len = sqp->s_len;
-
-               if (len > sge->length)
-                       len = sge->length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--sqp->s_sge.num_sge)
-                               *sge = *sqp->s_sge.sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               sqp->s_len -= len;
-       }
-
-       if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-               goto send_comp;
-
-       if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-               wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-       else
-               wc.opcode = IB_WC_RECV;
-       wc.wr_id = qp->r_wr_id;
-       wc.status = IB_WC_SUCCESS;
-       wc.byte_len = wqe->length;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = qp->remote_qpn;
-       wc.slid = qp->remote_ah_attr.dlid;
-       wc.sl = qp->remote_ah_attr.sl;
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-       spin_lock_irqsave(&sqp->s_lock, flags);
-flush_send:
-       sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-       ipath_send_complete(sqp, wqe, send_status);
-       goto again;
-
-rnr_nak:
-       /* Handle RNR NAK */
-       if (qp->ibqp.qp_type == IB_QPT_UC)
-               goto send_comp;
-       /*
-        * Note: we don't need the s_lock held since the BUSY flag
-        * makes this single threaded.
-        */
-       if (sqp->s_rnr_retry == 0) {
-               send_status = IB_WC_RNR_RETRY_EXC_ERR;
-               goto serr;
-       }
-       if (sqp->s_rnr_retry_cnt < 7)
-               sqp->s_rnr_retry--;
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
-               goto clr_busy;
-       sqp->s_flags |= IPATH_S_WAITING;
-       dev->n_rnr_naks++;
-       sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
-       ipath_insert_rnr_queue(sqp);
-       goto clr_busy;
-
-inv_err:
-       send_status = IB_WC_REM_INV_REQ_ERR;
-       wc.status = IB_WC_LOC_QP_OP_ERR;
-       goto err;
-
-acc_err:
-       send_status = IB_WC_REM_ACCESS_ERR;
-       wc.status = IB_WC_LOC_PROT_ERR;
-err:
-       /* responder goes to error state */
-       ipath_rc_error(qp, wc.status);
-
-serr:
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       ipath_send_complete(sqp, wqe, send_status);
-       if (sqp->ibqp.qp_type == IB_QPT_RC) {
-               int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-               sqp->s_flags &= ~IPATH_S_BUSY;
-               spin_unlock_irqrestore(&sqp->s_lock, flags);
-               if (lastwqe) {
-                       struct ib_event ev;
-
-                       ev.device = sqp->ibqp.device;
-                       ev.element.qp = &sqp->ibqp;
-                       ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-                       sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-               }
-               goto done;
-       }
-clr_busy:
-       sqp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-       if (qp && atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
-{
-       if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
-           qp->ibqp.qp_type == IB_QPT_SMI) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-       }
-}
-
-/**
- * ipath_no_bufs_available - tell the layer driver we need buffers
- * @qp: the QP that caused the problem
- * @dev: the device we ran out of buffers on
- *
- * Called when we run out of PIO buffers.
- * If we are now in the error state, return zero to flush the
- * send work request.
- */
-static int ipath_no_bufs_available(struct ipath_qp *qp,
-                                   struct ipath_ibdev *dev)
-{
-       unsigned long flags;
-       int ret = 1;
-
-       /*
-        * Note that as soon as want_buffer() is called and
-        * possibly before it returns, ipath_ib_piobufavail()
-        * could be called. Therefore, put QP on the piowait list before
-        * enabling the PIO avail interrupt.
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-       if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-               dev->n_piowait++;
-               qp->s_flags |= IPATH_S_WAITING;
-               qp->s_flags &= ~IPATH_S_BUSY;
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->piowait))
-                       list_add_tail(&qp->piowait, &dev->piowait);
-               spin_unlock(&dev->pending_lock);
-       } else
-               ret = 0;
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (ret)
-               want_buffer(dev->dd, qp);
-       return ret;
-}
-
-/**
- * ipath_make_grh - construct a GRH header
- * @dev: a pointer to the ipath device
- * @hdr: a pointer to the GRH header being constructed
- * @grh: the global route address to send to
- * @hwords: the number of 32 bit words of header being sent
- * @nwords: the number of 32 bit words of data being sent
- *
- * Return the size of the header in 32 bit words.
- */
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-                  struct ib_global_route *grh, u32 hwords, u32 nwords)
-{
-       hdr->version_tclass_flow =
-               cpu_to_be32((6 << 28) |
-                           (grh->traffic_class << 20) |
-                           grh->flow_label);
-       hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
-       /* next_hdr is defined by C8-7 in ch. 8.4.1 */
-       hdr->next_hdr = 0x1B;
-       hdr->hop_limit = grh->hop_limit;
-       /* The SGID is 32-bit aligned. */
-       hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-       hdr->sgid.global.interface_id = dev->dd->ipath_guid;
-       hdr->dgid = grh->dgid;
-
-       /* GRH header size in 32-bit words. */
-       return sizeof(struct ib_grh) / sizeof(u32);
-}
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                          struct ipath_other_headers *ohdr,
-                          u32 bth0, u32 bth2)
-{
-       u16 lrh0;
-       u32 nwords;
-       u32 extra_bytes;
-
-       /* Construct the header. */
-       extra_bytes = -qp->s_cur_size & 3;
-       nwords = (qp->s_cur_size + extra_bytes) >> 2;
-       lrh0 = IPATH_LRH_BTH;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &qp->remote_ah_attr.grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-       }
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
-                                      qp->remote_ah_attr.src_path_bits);
-       bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       bth0 |= extra_bytes << 20;
-       ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(bth2);
-}
-
-/**
- * ipath_do_send - perform a send on a QP
- * @data: contains a pointer to the QP
- *
- * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, two threads could send packets out of order.
- */
-void ipath_do_send(unsigned long data)
-{
-       struct ipath_qp *qp = (struct ipath_qp *)data;
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       int (*make_req)(struct ipath_qp *qp);
-       unsigned long flags;
-
-       if ((qp->ibqp.qp_type == IB_QPT_RC ||
-            qp->ibqp.qp_type == IB_QPT_UC) &&
-           qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
-               ipath_ruc_loopback(qp);
-               goto bail;
-       }
-
-       if (qp->ibqp.qp_type == IB_QPT_RC)
-              make_req = ipath_make_rc_req;
-       else if (qp->ibqp.qp_type == IB_QPT_UC)
-              make_req = ipath_make_uc_req;
-       else
-              make_req = ipath_make_ud_req;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       /* Return if we are already busy processing a work request. */
-       if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-           !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               goto bail;
-       }
-
-       qp->s_flags |= IPATH_S_BUSY;
-
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-again:
-       /* Check for a constructed packet to be sent. */
-       if (qp->s_hdrwords != 0) {
-               /*
-                * If no PIO bufs are available, return.  An interrupt will
-                * call ipath_ib_piobufavail() when one is available.
-                */
-               if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
-                                    qp->s_cur_sge, qp->s_cur_size)) {
-                       if (ipath_no_bufs_available(qp, dev))
-                               goto bail;
-               }
-               dev->n_unicast_xmit++;
-               /* Record that we sent the packet and s_hdr is empty. */
-               qp->s_hdrwords = 0;
-       }
-
-       if (make_req(qp))
-               goto again;
-
-bail:;
-}
-
-/*
- * This should be called with s_lock held.
- */
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                        enum ib_wc_status status)
-{
-       u32 old_last, last;
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-               return;
-
-       /* See ch. 11.2.4.1 and 10.7.3.1 */
-       if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-           (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-           status != IB_WC_SUCCESS) {
-               struct ib_wc wc;
-
-               memset(&wc, 0, sizeof wc);
-               wc.wr_id = wqe->wr.wr_id;
-               wc.status = status;
-               wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-               wc.qp = &qp->ibqp;
-               if (status == IB_WC_SUCCESS)
-                       wc.byte_len = wqe->length;
-               ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-                              status != IB_WC_SUCCESS);
-       }
-
-       old_last = last = qp->s_last;
-       if (++last >= qp->s_size)
-               last = 0;
-       qp->s_last = last;
-       if (qp->s_cur == old_last)
-               qp->s_cur = last;
-       if (qp->s_tail == old_last)
-               qp->s_tail = last;
-       if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-               qp->s_draining = 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sdma.c b/drivers/staging/rdma/ipath/ipath_sdma.c
deleted file mode 100644 (file)
index 1ffc06a..0000000
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
-
-static void vl15_watchdog_enq(struct ipath_devdata *dd)
-{
-       /* ipath_sdma_lock must already be held */
-       if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
-               unsigned long interval = (HZ + 19) / 20;
-               dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
-               add_timer(&dd->ipath_sdma_vl15_timer);
-       }
-}
-
-static void vl15_watchdog_deq(struct ipath_devdata *dd)
-{
-       /* ipath_sdma_lock must already be held */
-       if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
-               unsigned long interval = (HZ + 19) / 20;
-               mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
-       } else {
-               del_timer(&dd->ipath_sdma_vl15_timer);
-       }
-}
-
-static void vl15_watchdog_timeout(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
-               ipath_dbg("vl15 watchdog timeout - clearing\n");
-               ipath_cancel_sends(dd, 1);
-               ipath_hol_down(dd);
-       } else {
-               ipath_dbg("vl15 watchdog timeout - "
-                         "condition already cleared\n");
-       }
-}
-
-static void unmap_desc(struct ipath_devdata *dd, unsigned head)
-{
-       __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
-       u64 desc[2];
-       dma_addr_t addr;
-       size_t len;
-
-       desc[0] = le64_to_cpu(descqp[0]);
-       desc[1] = le64_to_cpu(descqp[1]);
-
-       addr = (desc[1] << 32) | (desc[0] >> 32);
-       len = (desc[0] >> 14) & (0x7ffULL << 2);
-       dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
-}
-
-/*
- * ipath_sdma_lock should be locked before calling this.
- */
-int ipath_sdma_make_progress(struct ipath_devdata *dd)
-{
-       struct list_head *lp = NULL;
-       struct ipath_sdma_txreq *txp = NULL;
-       u16 dmahead;
-       u16 start_idx = 0;
-       int progress = 0;
-
-       if (!list_empty(&dd->ipath_sdma_activelist)) {
-               lp = dd->ipath_sdma_activelist.next;
-               txp = list_entry(lp, struct ipath_sdma_txreq, list);
-               start_idx = txp->start_idx;
-       }
-
-       /*
-        * Read the SDMA head register in order to know that the
-        * interrupt clear has been written to the chip.
-        * Otherwise, we may not get an interrupt for the last
-        * descriptor in the queue.
-        */
-       dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
-       /* sanity check return value for error handling (chip reset, etc.) */
-       if (dmahead >= dd->ipath_sdma_descq_cnt)
-               goto done;
-
-       while (dd->ipath_sdma_descq_head != dmahead) {
-               if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
-                   dd->ipath_sdma_descq_head == start_idx) {
-                       unmap_desc(dd, dd->ipath_sdma_descq_head);
-                       start_idx++;
-                       if (start_idx == dd->ipath_sdma_descq_cnt)
-                               start_idx = 0;
-               }
-
-               /* increment free count and head */
-               dd->ipath_sdma_descq_removed++;
-               if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
-                       dd->ipath_sdma_descq_head = 0;
-
-               if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
-                       /* move to notify list */
-                       if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                               vl15_watchdog_deq(dd);
-                       list_move_tail(lp, &dd->ipath_sdma_notifylist);
-                       if (!list_empty(&dd->ipath_sdma_activelist)) {
-                               lp = dd->ipath_sdma_activelist.next;
-                               txp = list_entry(lp, struct ipath_sdma_txreq,
-                                                list);
-                               start_idx = txp->start_idx;
-                       } else {
-                               lp = NULL;
-                               txp = NULL;
-                       }
-               }
-               progress = 1;
-       }
-
-       if (progress)
-               tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-done:
-       return progress;
-}
-
-static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
-{
-       struct ipath_sdma_txreq *txp, *txp_next;
-
-       list_for_each_entry_safe(txp, txp_next, list, list) {
-               list_del_init(&txp->list);
-
-               if (txp->callback)
-                       (*txp->callback)(txp->callback_cookie,
-                                        txp->callback_status);
-       }
-}
-
-static void sdma_notify_taskbody(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       struct list_head list;
-
-       INIT_LIST_HEAD(&list);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       list_splice_init(&dd->ipath_sdma_notifylist, &list);
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       ipath_sdma_notify(dd, &list);
-
-       /*
-        * The IB verbs layer needs to see the callback before getting
-        * the call to ipath_ib_piobufavail() because the callback
-        * handles releasing resources the next send will need.
-        * Otherwise, we could do these calls in
-        * ipath_sdma_make_progress().
-        */
-       ipath_ib_piobufavail(dd->verbs_dev);
-}
-
-static void sdma_notify_task(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-       if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               sdma_notify_taskbody(dd);
-}
-
-static void dump_sdma_state(struct ipath_devdata *dd)
-{
-       unsigned long reg;
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
-       ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
-       ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
-       ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-       ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
-
-       reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-       ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
-}
-
-static void sdma_abort_task(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-       u64 status;
-       unsigned long flags;
-
-       if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               return;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
-
-       /* nothing to do */
-       if (status == IPATH_SDMA_ABORT_NONE)
-               goto unlock;
-
-       /* ipath_sdma_abort() is done, waiting for interrupt */
-       if (status == IPATH_SDMA_ABORT_DISARMED) {
-               if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
-                       goto resched_noprint;
-               /* give up, intr got lost somewhere */
-               ipath_dbg("give up waiting for SDMADISABLED intr\n");
-               __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               status = IPATH_SDMA_ABORT_ABORTED;
-       }
-
-       /* everything is stopped, time to clean up and restart */
-       if (status == IPATH_SDMA_ABORT_ABORTED) {
-               struct ipath_sdma_txreq *txp, *txpnext;
-               u64 hwstatus;
-               int notify = 0;
-
-               hwstatus = ipath_read_kreg64(dd,
-                               dd->ipath_kregs->kr_senddmastatus);
-
-               if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
-                                IPATH_SDMA_STATUS_ABORT_IN_PROG             |
-                                IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
-                   !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
-                       if (dd->ipath_sdma_reset_wait > 0) {
-                               /* not done shutting down sdma */
-                               --dd->ipath_sdma_reset_wait;
-                               goto resched;
-                       }
-                       ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
-                               "status after SDMA reset, continuing\n");
-                       dump_sdma_state(dd);
-               }
-
-               /* dequeue all "sent" requests */
-               list_for_each_entry_safe(txp, txpnext,
-                                        &dd->ipath_sdma_activelist, list) {
-                       txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
-                       if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                               vl15_watchdog_deq(dd);
-                       list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-                       notify = 1;
-               }
-               if (notify)
-                       tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-               /* reset our notion of head and tail */
-               dd->ipath_sdma_descq_tail = 0;
-               dd->ipath_sdma_descq_head = 0;
-               dd->ipath_sdma_head_dma[0] = 0;
-               dd->ipath_sdma_generation = 0;
-               dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
-
-               /* Reset SendDmaLenGen */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
-                       (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
-
-               /* done with sdma state for a bit */
-               spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-               /*
-                * Don't restart sdma here (with the exception
-                * below). Wait until link is up to ACTIVE.  VL15 MADs
-                * used to bring the link up use PIO, and multiple link
-                * transitions otherwise cause the sdma engine to be
-                * stopped and started multiple times.
-                * The disable is done here, including the shadow,
-                * so the state is kept consistent.
-                * See ipath_restart_sdma() for the actual starting
-                * of sdma.
-                */
-               spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-               dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-                                dd->ipath_sendctrl);
-               ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-               spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-               /* make sure I see next message */
-               dd->ipath_sdma_abort_jiffies = 0;
-
-               /*
-                * Not everything that takes SDMA offline is a link
-                * status change.  If the link was up, restart SDMA.
-                */
-               if (dd->ipath_flags & IPATH_LINKACTIVE)
-                       ipath_restart_sdma(dd);
-
-               goto done;
-       }
-
-resched:
-       /*
-        * for now, keep spinning
-        * JAG - this is bad to just have default be a loop without
-        * state change
-        */
-       if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
-               ipath_dbg("looping with status 0x%08lx\n",
-                         dd->ipath_sdma_status);
-               dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
-       }
-resched_noprint:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-               tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-       return;
-
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-done:
-       return;
-}
-
-/*
- * This is called from interrupt context.
- */
-void ipath_sdma_intr(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       (void) ipath_sdma_make_progress(dd);
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-}
-
-static int alloc_sdma(struct ipath_devdata *dd)
-{
-       int ret = 0;
-
-       /* Allocate memory for SendDMA descriptor FIFO */
-       dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
-               SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
-
-       if (!dd->ipath_sdma_descq) {
-               ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
-                       "FIFO memory\n");
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       dd->ipath_sdma_descq_cnt =
-               SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
-
-       /* Allocate memory for DMA of head register to memory */
-       dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
-               PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
-       if (!dd->ipath_sdma_head_dma) {
-               ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
-               ret = -ENOMEM;
-               goto cleanup_descq;
-       }
-       dd->ipath_sdma_head_dma[0] = 0;
-
-       setup_timer(&dd->ipath_sdma_vl15_timer, vl15_watchdog_timeout,
-                       (unsigned long)dd);
-
-       atomic_set(&dd->ipath_sdma_vl15_count, 0);
-
-       goto done;
-
-cleanup_descq:
-       dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-               (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
-       dd->ipath_sdma_descq = NULL;
-       dd->ipath_sdma_descq_phys = 0;
-done:
-       return ret;
-}
-
-int setup_sdma(struct ipath_devdata *dd)
-{
-       int ret = 0;
-       unsigned i, n;
-       u64 tmp64;
-       u64 senddmabufmask[3] = { 0 };
-       unsigned long flags;
-
-       ret = alloc_sdma(dd);
-       if (ret)
-               goto done;
-
-       if (!dd->ipath_sdma_descq) {
-               ipath_dev_err(dd, "SendDMA memory not allocated\n");
-               goto done;
-       }
-
-       /*
-        * Set initial status as if we had been up, then gone down.
-        * This lets initial start on transition to ACTIVE be the
-        * same as restart after link flap.
-        */
-       dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
-       dd->ipath_sdma_abort_jiffies = 0;
-       dd->ipath_sdma_generation = 0;
-       dd->ipath_sdma_descq_tail = 0;
-       dd->ipath_sdma_descq_head = 0;
-       dd->ipath_sdma_descq_removed = 0;
-       dd->ipath_sdma_descq_added = 0;
-
-       /* Set SendDmaBase */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
-                        dd->ipath_sdma_descq_phys);
-       /* Set SendDmaLenGen */
-       tmp64 = dd->ipath_sdma_descq_cnt;
-       tmp64 |= 1<<18; /* enable generation checking */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
-       /* Set SendDmaTail */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
-                        dd->ipath_sdma_descq_tail);
-       /* Set SendDmaHeadAddr */
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
-                        dd->ipath_sdma_head_phys);
-
-       /*
-        * Reserve all the former "kernel" piobufs, using high number range
-        * so we get as many 4K buffers as possible
-        */
-       n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-       i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
-       ipath_chg_pioavailkernel(dd, i, n - i , 0);
-       for (; i < n; ++i) {
-               unsigned word = i / 64;
-               unsigned bit = i & 63;
-               BUG_ON(word >= 3);
-               senddmabufmask[word] |= 1ULL << bit;
-       }
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
-                        senddmabufmask[0]);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
-                        senddmabufmask[1]);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
-                        senddmabufmask[2]);
-
-       INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
-       INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
-
-       tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
-                    (unsigned long) dd);
-       tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
-                    (unsigned long) dd);
-
-       /*
-        * No use to turn on SDMA here, as link is probably not ACTIVE
-        * Just mark it RUNNING and enable the interrupt, and let the
-        * ipath_restart_sdma() on link transition to ACTIVE actually
-        * enable it.
-        */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-done:
-       return ret;
-}
-
-void teardown_sdma(struct ipath_devdata *dd)
-{
-       struct ipath_sdma_txreq *txp, *txpnext;
-       unsigned long flags;
-       dma_addr_t sdma_head_phys = 0;
-       dma_addr_t sdma_descq_phys = 0;
-       void *sdma_descq = NULL;
-       void *sdma_head_dma = NULL;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-       __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       tasklet_kill(&dd->ipath_sdma_abort_task);
-       tasklet_kill(&dd->ipath_sdma_notify_task);
-
-       /* turn off sdma */
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-               dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       /* dequeue all "sent" requests */
-       list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
-                                list) {
-               txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
-               if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-                       vl15_watchdog_deq(dd);
-               list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       sdma_notify_taskbody(dd);
-
-       del_timer_sync(&dd->ipath_sdma_vl15_timer);
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       dd->ipath_sdma_abort_jiffies = 0;
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
-
-       if (dd->ipath_sdma_head_dma) {
-               sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
-               sdma_head_phys = dd->ipath_sdma_head_phys;
-               dd->ipath_sdma_head_dma = NULL;
-               dd->ipath_sdma_head_phys = 0;
-       }
-
-       if (dd->ipath_sdma_descq) {
-               sdma_descq = dd->ipath_sdma_descq;
-               sdma_descq_phys = dd->ipath_sdma_descq_phys;
-               dd->ipath_sdma_descq = NULL;
-               dd->ipath_sdma_descq_phys = 0;
-       }
-
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       if (sdma_head_dma)
-               dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-                                 sdma_head_dma, sdma_head_phys);
-
-       if (sdma_descq)
-               dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-                                 sdma_descq, sdma_descq_phys);
-}
-
-/*
- * [Re]start SDMA, if we use it, and it's not already OK.
- * This is called on transition to link ACTIVE, either the first or
- * subsequent times.
- */
-void ipath_restart_sdma(struct ipath_devdata *dd)
-{
-       unsigned long flags;
-       int needed = 1;
-
-       if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-               goto bail;
-
-       /*
-        * First, make sure we should, which is to say,
-        * check that we are "RUNNING" (not in teardown)
-        * and not "SHUTDOWN"
-        */
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
-               || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-                       needed = 0;
-       else {
-               __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-               __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-               __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-       }
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-       if (!needed) {
-               ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
-                       dd->ipath_sdma_status);
-               goto bail;
-       }
-       spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-       /*
-        * First clear, just to be safe. Enable is only done
-        * in chip on 0->1 transition
-        */
-       dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-       ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-       spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-       /* notify upper layers */
-       ipath_ib_piobufavail(dd->verbs_dev);
-
-bail:
-       return;
-}
-
-static inline void make_sdma_desc(struct ipath_devdata *dd,
-       u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
-{
-       WARN_ON(addr & 3);
-       /* SDmaPhyAddr[47:32] */
-       sdmadesc[1] = addr >> 32;
-       /* SDmaPhyAddr[31:0] */
-       sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
-       /* SDmaGeneration[1:0] */
-       sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
-       /* SDmaDwordCount[10:0] */
-       sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
-       /* SDmaBufOffset[12:2] */
-       sdmadesc[0] |= dwoffset & 0x7ffULL;
-}
-
-/*
- * This function queues one IB packet onto the send DMA queue per call.
- * The caller is responsible for checking:
- * 1) The number of send DMA descriptor entries is less than the size of
- *    the descriptor queue.
- * 2) The IB SGE addresses and lengths are 32-bit aligned
- *    (except possibly the last SGE's length)
- * 3) The SGE addresses are suitable for passing to dma_map_single().
- */
-int ipath_sdma_verbs_send(struct ipath_devdata *dd,
-       struct ipath_sge_state *ss, u32 dwords,
-       struct ipath_verbs_txreq *tx)
-{
-
-       unsigned long flags;
-       struct ipath_sge *sge;
-       int ret = 0;
-       u16 tail;
-       __le64 *descqp;
-       u64 sdmadesc[2];
-       u32 dwoffset;
-       dma_addr_t addr;
-
-       if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
-               ipath_dbg("packet size %X > ibmax %X, fail\n",
-                       tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
-               ret = -EMSGSIZE;
-               goto fail;
-       }
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-retry:
-       if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
-               ret = -EBUSY;
-               goto unlock;
-       }
-
-       if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
-               if (ipath_sdma_make_progress(dd))
-                       goto retry;
-               ret = -ENOBUFS;
-               goto unlock;
-       }
-
-       addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
-                             tx->map_len, DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, addr))
-               goto ioerr;
-
-       dwoffset = tx->map_len >> 2;
-       make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
-
-       /* SDmaFirstDesc */
-       sdmadesc[0] |= 1ULL << 12;
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-               sdmadesc[0] |= 1ULL << 14;      /* SDmaUseLargeBuf */
-
-       /* write to the descq */
-       tail = dd->ipath_sdma_descq_tail;
-       descqp = &dd->ipath_sdma_descq[tail].qw[0];
-       *descqp++ = cpu_to_le64(sdmadesc[0]);
-       *descqp++ = cpu_to_le64(sdmadesc[1]);
-
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
-               tx->txreq.start_idx = tail;
-
-       /* increment the tail */
-       if (++tail == dd->ipath_sdma_descq_cnt) {
-               tail = 0;
-               descqp = &dd->ipath_sdma_descq[0].qw[0];
-               ++dd->ipath_sdma_generation;
-       }
-
-       sge = &ss->sge;
-       while (dwords) {
-               u32 dw;
-               u32 len;
-
-               len = dwords << 2;
-               if (len > sge->length)
-                       len = sge->length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               dw = (len + 3) >> 2;
-               addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
-                                     DMA_TO_DEVICE);
-               if (dma_mapping_error(&dd->pcidev->dev, addr))
-                       goto unmap;
-               make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
-               /* SDmaUseLargeBuf has to be set in every descriptor */
-               if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-                       sdmadesc[0] |= 1ULL << 14;
-               /* write to the descq */
-               *descqp++ = cpu_to_le64(sdmadesc[0]);
-               *descqp++ = cpu_to_le64(sdmadesc[1]);
-
-               /* increment the tail */
-               if (++tail == dd->ipath_sdma_descq_cnt) {
-                       tail = 0;
-                       descqp = &dd->ipath_sdma_descq[0].qw[0];
-                       ++dd->ipath_sdma_generation;
-               }
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-
-               dwoffset += dw;
-               dwords -= dw;
-       }
-
-       if (!tail)
-               descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
-       descqp -= 2;
-       /* SDmaLastDesc */
-       descqp[0] |= cpu_to_le64(1ULL << 11);
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
-               /* SDmaIntReq */
-               descqp[0] |= cpu_to_le64(1ULL << 15);
-       }
-
-       /* Commit writes to memory and advance the tail on the chip */
-       wmb();
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-
-       tx->txreq.next_descq_idx = tail;
-       tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
-       dd->ipath_sdma_descq_tail = tail;
-       dd->ipath_sdma_descq_added += tx->txreq.sg_count;
-       list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
-               vl15_watchdog_enq(dd);
-       goto unlock;
-
-unmap:
-       while (tail != dd->ipath_sdma_descq_tail) {
-               if (!tail)
-                       tail = dd->ipath_sdma_descq_cnt - 1;
-               else
-                       tail--;
-               unmap_desc(dd, tail);
-       }
-ioerr:
-       ret = -EIO;
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-fail:
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_srq.c b/drivers/staging/rdma/ipath/ipath_srq.c
deleted file mode 100644 (file)
index 2627198..0000000
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-                          struct ib_recv_wr **bad_wr)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_rwq *wq;
-       unsigned long flags;
-       int ret;
-
-       for (; wr; wr = wr->next) {
-               struct ipath_rwqe *wqe;
-               u32 next;
-               int i;
-
-               if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-                       *bad_wr = wr;
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               spin_lock_irqsave(&srq->rq.lock, flags);
-               wq = srq->rq.wq;
-               next = wq->head + 1;
-               if (next >= srq->rq.size)
-                       next = 0;
-               if (next == wq->tail) {
-                       spin_unlock_irqrestore(&srq->rq.lock, flags);
-                       *bad_wr = wr;
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               wqe = get_rwqe_ptr(&srq->rq, wq->head);
-               wqe->wr_id = wr->wr_id;
-               wqe->num_sge = wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++)
-                       wqe->sg_list[i] = wr->sg_list[i];
-               /* Make sure queue entry is written before the head index. */
-               smp_wmb();
-               wq->head = next;
-               spin_unlock_irqrestore(&srq->rq.lock, flags);
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libipathverbs when creating a user SRQ
- */
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-                               struct ib_srq_init_attr *srq_init_attr,
-                               struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibpd->device);
-       struct ipath_srq *srq;
-       u32 sz;
-       struct ib_srq *ret;
-
-       if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-               ret = ERR_PTR(-ENOSYS);
-               goto done;
-       }
-
-       if (srq_init_attr->attr.max_wr == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
-           (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
-               ret = ERR_PTR(-EINVAL);
-               goto done;
-       }
-
-       srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-       if (!srq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto done;
-       }
-
-       /*
-        * Need to use vmalloc() if we want to support large #s of entries.
-        */
-       srq->rq.size = srq_init_attr->attr.max_wr + 1;
-       srq->rq.max_sge = srq_init_attr->attr.max_sge;
-       sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-               sizeof(struct ipath_rwqe);
-       srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
-       if (!srq->rq.wq) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_srq;
-       }
-
-       /*
-        * Return the address of the RWQ as the offset to mmap.
-        * See ipath_mmap() for details.
-        */
-       if (udata && udata->outlen >= sizeof(__u64)) {
-               int err;
-               u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
-
-               srq->ip =
-                   ipath_create_mmap_info(dev, s,
-                                          ibpd->uobject->context,
-                                          srq->rq.wq);
-               if (!srq->ip) {
-                       ret = ERR_PTR(-ENOMEM);
-                       goto bail_wq;
-               }
-
-               err = ib_copy_to_udata(udata, &srq->ip->offset,
-                                      sizeof(srq->ip->offset));
-               if (err) {
-                       ret = ERR_PTR(err);
-                       goto bail_ip;
-               }
-       } else
-               srq->ip = NULL;
-
-       /*
-        * ib_create_srq() will initialize srq->ibsrq.
-        */
-       spin_lock_init(&srq->rq.lock);
-       srq->rq.wq->head = 0;
-       srq->rq.wq->tail = 0;
-       srq->limit = srq_init_attr->attr.srq_limit;
-
-       spin_lock(&dev->n_srqs_lock);
-       if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
-               spin_unlock(&dev->n_srqs_lock);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail_ip;
-       }
-
-       dev->n_srqs_allocated++;
-       spin_unlock(&dev->n_srqs_lock);
-
-       if (srq->ip) {
-               spin_lock_irq(&dev->pending_lock);
-               list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-               spin_unlock_irq(&dev->pending_lock);
-       }
-
-       ret = &srq->ibsrq;
-       goto done;
-
-bail_ip:
-       kfree(srq->ip);
-bail_wq:
-       vfree(srq->rq.wq);
-bail_srq:
-       kfree(srq);
-done:
-       return ret;
-}
-
-/**
- * ipath_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for ipathverbs.so
- */
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask,
-                    struct ib_udata *udata)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_rwq *wq;
-       int ret = 0;
-
-       if (attr_mask & IB_SRQ_MAX_WR) {
-               struct ipath_rwq *owq;
-               struct ipath_rwqe *p;
-               u32 sz, size, n, head, tail;
-
-               /* Check that the requested sizes are below the limits. */
-               if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-                   ((attr_mask & IB_SRQ_LIMIT) ?
-                    attr->srq_limit : srq->limit) > attr->max_wr) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               sz = sizeof(struct ipath_rwqe) +
-                       srq->rq.max_sge * sizeof(struct ib_sge);
-               size = attr->max_wr + 1;
-               wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
-               if (!wq) {
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               /* Check that we can write the offset to mmap. */
-               if (udata && udata->inlen >= sizeof(__u64)) {
-                       __u64 offset_addr;
-                       __u64 offset = 0;
-
-                       ret = ib_copy_from_udata(&offset_addr, udata,
-                                                sizeof(offset_addr));
-                       if (ret)
-                               goto bail_free;
-                       udata->outbuf =
-                               (void __user *) (unsigned long) offset_addr;
-                       ret = ib_copy_to_udata(udata, &offset,
-                                              sizeof(offset));
-                       if (ret)
-                               goto bail_free;
-               }
-
-               spin_lock_irq(&srq->rq.lock);
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               owq = srq->rq.wq;
-               head = owq->head;
-               if (head >= srq->rq.size)
-                       head = 0;
-               tail = owq->tail;
-               if (tail >= srq->rq.size)
-                       tail = 0;
-               n = head;
-               if (n < tail)
-                       n += srq->rq.size - tail;
-               else
-                       n -= tail;
-               if (size <= n) {
-                       ret = -EINVAL;
-                       goto bail_unlock;
-               }
-               n = 0;
-               p = wq->wq;
-               while (tail != head) {
-                       struct ipath_rwqe *wqe;
-                       int i;
-
-                       wqe = get_rwqe_ptr(&srq->rq, tail);
-                       p->wr_id = wqe->wr_id;
-                       p->num_sge = wqe->num_sge;
-                       for (i = 0; i < wqe->num_sge; i++)
-                               p->sg_list[i] = wqe->sg_list[i];
-                       n++;
-                       p = (struct ipath_rwqe *)((char *) p + sz);
-                       if (++tail >= srq->rq.size)
-                               tail = 0;
-               }
-               srq->rq.wq = wq;
-               srq->rq.size = size;
-               wq->head = n;
-               wq->tail = 0;
-               if (attr_mask & IB_SRQ_LIMIT)
-                       srq->limit = attr->srq_limit;
-               spin_unlock_irq(&srq->rq.lock);
-
-               vfree(owq);
-
-               if (srq->ip) {
-                       struct ipath_mmap_info *ip = srq->ip;
-                       struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
-                       u32 s = sizeof(struct ipath_rwq) + size * sz;
-
-                       ipath_update_mmap_info(dev, ip, s, wq);
-
-                       /*
-                        * Return the offset to mmap.
-                        * See ipath_mmap() for details.
-                        */
-                       if (udata && udata->inlen >= sizeof(__u64)) {
-                               ret = ib_copy_to_udata(udata, &ip->offset,
-                                                      sizeof(ip->offset));
-                               if (ret)
-                                       goto bail;
-                       }
-
-                       spin_lock_irq(&dev->pending_lock);
-                       if (list_empty(&ip->pending_mmaps))
-                               list_add(&ip->pending_mmaps,
-                                        &dev->pending_mmaps);
-                       spin_unlock_irq(&dev->pending_lock);
-               }
-       } else if (attr_mask & IB_SRQ_LIMIT) {
-               spin_lock_irq(&srq->rq.lock);
-               if (attr->srq_limit >= srq->rq.size)
-                       ret = -EINVAL;
-               else
-                       srq->limit = attr->srq_limit;
-               spin_unlock_irq(&srq->rq.lock);
-       }
-       goto bail;
-
-bail_unlock:
-       spin_unlock_irq(&srq->rq.lock);
-bail_free:
-       vfree(wq);
-bail:
-       return ret;
-}
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-
-       attr->max_wr = srq->rq.size - 1;
-       attr->max_sge = srq->rq.max_sge;
-       attr->srq_limit = srq->limit;
-       return 0;
-}
-
-/**
- * ipath_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int ipath_destroy_srq(struct ib_srq *ibsrq)
-{
-       struct ipath_srq *srq = to_isrq(ibsrq);
-       struct ipath_ibdev *dev = to_idev(ibsrq->device);
-
-       spin_lock(&dev->n_srqs_lock);
-       dev->n_srqs_allocated--;
-       spin_unlock(&dev->n_srqs_lock);
-       if (srq->ip)
-               kref_put(&srq->ip->ref, ipath_release_mmap_info);
-       else
-               vfree(srq->rq.wq);
-       kfree(srq);
-
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_stats.c b/drivers/staging/rdma/ipath/ipath_stats.c
deleted file mode 100644 (file)
index f63e143..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_kernel.h"
-
-struct infinipath_stats ipath_stats;
-
-/**
- * ipath_snap_cntr - snapshot a chip counter
- * @dd: the infinipath device
- * @creg: the counter to snapshot
- *
- * called from add_timer and user counter read calls, to deal with
- * counters that wrap in "human time".  The words sent and received, and
- * the packets sent and received are all that we worry about.  For now,
- * at least, we don't worry about error counters, because if they wrap
- * that quickly, we probably don't care.  We may eventually just make this
- * handle all the counters.  word counters can wrap in about 20 seconds
- * of full bandwidth traffic, packet counters in a few hours.
- */
-
-u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
-{
-       u32 val, reg64 = 0;
-       u64 val64;
-       unsigned long t0, t1;
-       u64 ret;
-
-       t0 = jiffies;
-       /* If fast increment counters are only 32 bits, snapshot them,
-        * and maintain them as 64bit values in the driver */
-       if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
-           (creg == dd->ipath_cregs->cr_wordsendcnt ||
-            creg == dd->ipath_cregs->cr_wordrcvcnt ||
-            creg == dd->ipath_cregs->cr_pktsendcnt ||
-            creg == dd->ipath_cregs->cr_pktrcvcnt)) {
-               val64 = ipath_read_creg(dd, creg);
-               val = val64 == ~0ULL ? ~0U : 0;
-               reg64 = 1;
-       } else                  /* val64 just to keep gcc quiet... */
-               val64 = val = ipath_read_creg32(dd, creg);
-       /*
-        * See if a second has passed.  This is just a way to detect things
-        * that are quite broken.  Normally this should take just a few
-        * cycles (the check is for long enough that we don't care if we get
-        * pre-empted.)  An Opteron HT O read timeout is 4 seconds with
-        * normal NB values
-        */
-       t1 = jiffies;
-       if (time_before(t0 + HZ, t1) && val == -1) {
-               ipath_dev_err(dd, "Error!  Read counter 0x%x timed out\n",
-                             creg);
-               ret = 0ULL;
-               goto bail;
-       }
-       if (reg64) {
-               ret = val64;
-               goto bail;
-       }
-
-       if (creg == dd->ipath_cregs->cr_wordsendcnt) {
-               if (val != dd->ipath_lastsword) {
-                       dd->ipath_sword += val - dd->ipath_lastsword;
-                       dd->ipath_lastsword = val;
-               }
-               val64 = dd->ipath_sword;
-       } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
-               if (val != dd->ipath_lastrword) {
-                       dd->ipath_rword += val - dd->ipath_lastrword;
-                       dd->ipath_lastrword = val;
-               }
-               val64 = dd->ipath_rword;
-       } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
-               if (val != dd->ipath_lastspkts) {
-                       dd->ipath_spkts += val - dd->ipath_lastspkts;
-                       dd->ipath_lastspkts = val;
-               }
-               val64 = dd->ipath_spkts;
-       } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
-               if (val != dd->ipath_lastrpkts) {
-                       dd->ipath_rpkts += val - dd->ipath_lastrpkts;
-                       dd->ipath_lastrpkts = val;
-               }
-               val64 = dd->ipath_rpkts;
-       } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
-               if (dd->ibdeltainprog)
-                       val64 -= val64 - dd->ibsymsnap;
-               val64 -= dd->ibsymdelta;
-       } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
-               if (dd->ibdeltainprog)
-                       val64 -= val64 - dd->iblnkerrsnap;
-               val64 -= dd->iblnkerrdelta;
-       } else
-               val64 = (u64) val;
-
-       ret = val64;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
- * @dd: the infinipath device
- *
- * print the delta of egrfull/hdrqfull errors for kernel ports no more than
- * every 5 seconds.  User processes are printed at close, but kernel doesn't
- * close, so...  Separate routine so may call from other places someday, and
- * so function name when printed by _IPATH_INFO is meaningfull
- */
-static void ipath_qcheck(struct ipath_devdata *dd)
-{
-       static u64 last_tot_hdrqfull;
-       struct ipath_portdata *pd = dd->ipath_pd[0];
-       size_t blen = 0;
-       char buf[128];
-       u32 hdrqtail;
-
-       *buf = 0;
-       if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
-               blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
-                               pd->port_hdrqfull -
-                               dd->ipath_p0_hdrqfull);
-               dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
-       }
-       if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
-               blen += snprintf(buf + blen, sizeof buf - blen,
-                                "%srcvegrfull %llu",
-                                blen ? ", " : "",
-                                (unsigned long long)
-                                (ipath_stats.sps_etidfull -
-                                 dd->ipath_last_tidfull));
-               dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
-       }
-
-       /*
-        * this is actually the number of hdrq full interrupts, not actual
-        * events, but at the moment that's mostly what I'm interested in.
-        * Actual count, etc. is in the counters, if needed.  For production
-        * users this won't ordinarily be printed.
-        */
-
-       if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
-           ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
-               blen += snprintf(buf + blen, sizeof buf - blen,
-                                "%shdrqfull %llu (all ports)",
-                                blen ? ", " : "",
-                                (unsigned long long)
-                                (ipath_stats.sps_hdrqfull -
-                                 last_tot_hdrqfull));
-               last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
-       }
-       if (blen)
-               ipath_dbg("%s\n", buf);
-
-       hdrqtail = ipath_get_hdrqtail(pd);
-       if (pd->port_head != hdrqtail) {
-               if (dd->ipath_lastport0rcv_cnt ==
-                   ipath_stats.sps_port0pkts) {
-                       ipath_cdbg(PKT, "missing rcv interrupts? "
-                                  "port0 hd=%x tl=%x; port0pkts %llx; write"
-                                  " hd (w/intr)\n",
-                                  pd->port_head, hdrqtail,
-                                  (unsigned long long)
-                                  ipath_stats.sps_port0pkts);
-                       ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
-                               dd->ipath_rhdrhead_intr_off, pd->port_port);
-               }
-               dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
-       }
-}
-
-static void ipath_chk_errormask(struct ipath_devdata *dd)
-{
-       static u32 fixed;
-       u32 ctrl;
-       unsigned long errormask;
-       unsigned long hwerrs;
-
-       if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
-               return;
-
-       errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-
-       if (errormask == dd->ipath_errormask)
-               return;
-       fixed++;
-
-       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-
-       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               dd->ipath_errormask);
-
-       if ((hwerrs & dd->ipath_hwerrmask) ||
-               (ctrl & INFINIPATH_C_FREEZEMODE)) {
-               /* force re-interrupt of pending events, just in case */
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-               dev_info(&dd->pcidev->dev,
-                       "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
-                       fixed, errormask, (unsigned long)dd->ipath_errormask,
-                       ctrl, hwerrs);
-       } else
-               ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
-                       fixed, errormask,
-                       (unsigned long)dd->ipath_errormask);
-}
-
-
-/**
- * ipath_get_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the infinipath device ipath_devdata
- *
- * called from add_timer
- */
-void ipath_get_faststats(unsigned long opaque)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-       int i;
-       static unsigned cnt;
-       unsigned long flags;
-       u64 traffic_wds;
-
-       /*
-        * don't access the chip while running diags, or memory diags can
-        * fail
-        */
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
-           ipath_diag_inuse)
-               /* but re-arm the timer, for diags case; won't hurt other */
-               goto done;
-
-       /*
-        * We now try to maintain a "active timer", based on traffic
-        * exceeding a threshold, so we need to check the word-counts
-        * even if they are 64-bit.
-        */
-       traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       traffic_wds -= dd->ipath_traffic_wds;
-       dd->ipath_traffic_wds += traffic_wds;
-       if (traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
-               atomic_add(5, &dd->ipath_active_time); /* S/B #define */
-       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-
-       if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       }
-
-       ipath_qcheck(dd);
-
-       /*
-        * deal with repeat error suppression.  Doesn't really matter if
-        * last error was almost a full interval ago, or just a few usecs
-        * ago; still won't get more than 2 per interval.  We may want
-        * longer intervals for this eventually, could do with mod, counter
-        * or separate timer.  Also see code in ipath_handle_errors() and
-        * ipath_handle_hwerrors().
-        */
-
-       if (dd->ipath_lasterror)
-               dd->ipath_lasterror = 0;
-       if (dd->ipath_lasthwerror)
-               dd->ipath_lasthwerror = 0;
-       if (dd->ipath_maskederrs
-           && time_after(jiffies, dd->ipath_unmasktime)) {
-               char ebuf[256];
-               int iserr;
-               iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
-                                        dd->ipath_maskederrs);
-               if (dd->ipath_maskederrs &
-                   ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-                     INFINIPATH_E_PKTERRS))
-                       ipath_dev_err(dd, "Re-enabling masked errors "
-                                     "(%s)\n", ebuf);
-               else {
-                       /*
-                        * rcvegrfull and rcvhdrqfull are "normal", for some
-                        * types of processes (mostly benchmarks) that send
-                        * huge numbers of messages, while not processing
-                        * them.  So only complain about these at debug
-                        * level.
-                        */
-                       if (iserr)
-                               ipath_dbg(
-                                       "Re-enabling queue full errors (%s)\n",
-                                       ebuf);
-                       else
-                               ipath_cdbg(ERRPKT, "Re-enabling packet"
-                                       " problem interrupt (%s)\n", ebuf);
-               }
-
-               /* re-enable masked errors */
-               dd->ipath_errormask |= dd->ipath_maskederrs;
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                dd->ipath_errormask);
-               dd->ipath_maskederrs = 0;
-       }
-
-       /* limit qfull messages to ~one per minute per port */
-       if ((++cnt & 0x10)) {
-               for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
-                       struct ipath_portdata *pd = dd->ipath_pd[i];
-
-                       if (pd && pd->port_lastrcvhdrqtail != -1)
-                               pd->port_lastrcvhdrqtail = -1;
-               }
-       }
-
-       ipath_chk_errormask(dd);
-done:
-       mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sysfs.c b/drivers/staging/rdma/ipath/ipath_sysfs.c
deleted file mode 100644 (file)
index b12b1f6..0000000
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/**
- * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * returns the number of bytes consumed, or negative value on error
- */
-int ipath_parse_ushort(const char *str, unsigned short *valp)
-{
-       unsigned long val;
-       char *end;
-       int ret;
-
-       if (!isdigit(str[0])) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       val = simple_strtoul(str, &end, 0);
-
-       if (val > 0xffff) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       *valp = val;
-
-       ret = end + 1 - str;
-       if (ret == 0)
-               ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-static ssize_t show_version(struct device_driver *dev, char *buf)
-{
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
-}
-
-static ssize_t show_num_units(struct device_driver *dev, char *buf)
-{
-       return scnprintf(buf, PAGE_SIZE, "%d\n",
-                        ipath_count_units(NULL, NULL, NULL));
-}
-
-static ssize_t show_status(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-
-       if (!dd->ipath_statusp) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
-                       (unsigned long long) *(dd->ipath_statusp));
-
-bail:
-       return ret;
-}
-
-static const char *ipath_status_str[] = {
-       "Initted",
-       "Disabled",
-       "Admin_Disabled",
-       "", /* This used to be the old "OIB_SMA" status. */
-       "", /* This used to be the old "SMA" status. */
-       "Present",
-       "IB_link_up",
-       "IB_configured",
-       "NoIBcable",
-       "Fatal_Hardware_Error",
-       NULL,
-};
-
-static ssize_t show_status_str(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int i, any;
-       u64 s;
-       ssize_t ret;
-
-       if (!dd->ipath_statusp) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       s = *(dd->ipath_statusp);
-       *buf = '\0';
-       for (any = i = 0; s && ipath_status_str[i]; i++) {
-               if (s & 1) {
-                       if (any && strlcat(buf, " ", PAGE_SIZE) >=
-                           PAGE_SIZE)
-                               /* overflow */
-                               break;
-                       if (strlcat(buf, ipath_status_str[i],
-                                   PAGE_SIZE) >= PAGE_SIZE)
-                               break;
-                       any = 1;
-               }
-               s >>= 1;
-       }
-       if (any)
-               strlcat(buf, "\n", PAGE_SIZE);
-
-       ret = strlen(buf);
-
-bail:
-       return ret;
-}
-
-static ssize_t show_boardversion(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
-}
-
-static ssize_t show_localbus_info(struct device *dev,
-                              struct device_attribute *attr,
-                              char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       /* The string printed here is already newline-terminated. */
-       return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
-}
-
-static ssize_t show_lmc(struct device *dev,
-                       struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
-}
-
-static ssize_t store_lmc(struct device *dev,
-                        struct device_attribute *attr,
-                        const char *buf,
-                        size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 lmc = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &lmc);
-       if (ret < 0)
-               goto invalid;
-
-       if (lmc > 7) {
-               ret = -EINVAL;
-               goto invalid;
-       }
-
-       ipath_set_lid(dd, dd->ipath_lid, lmc);
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
-bail:
-       return ret;
-}
-
-static ssize_t show_lid(struct device *dev,
-                       struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
-}
-
-static ssize_t store_lid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 lid = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &lid);
-       if (ret < 0)
-               goto invalid;
-
-       if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
-               ret = -EINVAL;
-               goto invalid;
-       }
-
-       ipath_set_lid(dd, lid, dd->ipath_lmc);
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
-bail:
-       return ret;
-}
-
-static ssize_t show_mlid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
-}
-
-static ssize_t store_mlid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 mlid;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &mlid);
-       if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
-               goto invalid;
-
-       dd->ipath_mlid = mlid;
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid MLID\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_guid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u8 *guid;
-
-       guid = (u8 *) & (dd->ipath_guid);
-
-       return scnprintf(buf, PAGE_SIZE,
-                        "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-                        guid[0], guid[1], guid[2], guid[3],
-                        guid[4], guid[5], guid[6], guid[7]);
-}
-
-static ssize_t store_guid(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       unsigned short guid[8];
-       __be64 new_guid;
-       u8 *ng;
-       int i;
-
-       if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
-                  &guid[0], &guid[1], &guid[2], &guid[3],
-                  &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
-               goto invalid;
-
-       ng = (u8 *) &new_guid;
-
-       for (i = 0; i < 8; i++) {
-               if (guid[i] > 0xff)
-                       goto invalid;
-               ng[i] = guid[i];
-       }
-
-       if (new_guid == 0)
-               goto invalid;
-
-       dd->ipath_guid = new_guid;
-       dd->ipath_nguid = 1;
-       if (dd->verbs_dev)
-               dd->verbs_dev->ibdev.node_guid = new_guid;
-
-       ret = strlen(buf);
-       goto bail;
-
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid GUID\n");
-       ret = -EINVAL;
-
-bail:
-       return ret;
-}
-
-static ssize_t show_nguid(struct device *dev,
-                         struct device_attribute *attr,
-                         char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
-}
-
-static ssize_t show_nports(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       /* Return the number of user ports available. */
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
-}
-
-static ssize_t show_serial(struct device *dev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       buf[sizeof dd->ipath_serial] = '\0';
-       memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
-       strcat(buf, "\n");
-       return strlen(buf);
-}
-
-static ssize_t show_unit(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
-}
-
-static ssize_t show_jint_max_packets(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
-}
-
-static ssize_t store_jint_max_packets(struct device *dev,
-                                     struct device_attribute *attr,
-                                     const char *buf,
-                                     size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 v = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &v);
-       if (ret < 0)
-               ipath_dev_err(dd, "invalid jint_max_packets.\n");
-       else
-               dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
-
-       return ret;
-}
-
-static ssize_t show_jint_idle_ticks(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-       return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
-}
-
-static ssize_t store_jint_idle_ticks(struct device *dev,
-                                    struct device_attribute *attr,
-                                    const char *buf,
-                                    size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       u16 v = 0;
-       int ret;
-
-       ret = ipath_parse_ushort(buf, &v);
-       if (ret < 0)
-               ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
-       else
-               dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
-
-       return ret;
-}
-
-#define DEVICE_COUNTER(name, attr) \
-       static ssize_t show_counter_##name(struct device *dev, \
-                                          struct device_attribute *attr, \
-                                          char *buf) \
-       { \
-               struct ipath_devdata *dd = dev_get_drvdata(dev); \
-               return scnprintf(\
-                       buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
-                       ipath_snap_cntr( \
-                               dd, offsetof(struct infinipath_counters, \
-                                            attr) / sizeof(u64)));     \
-       } \
-       static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
-
-DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
-DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
-DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
-DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
-DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
-DEVICE_COUNTER(lb_ints, LBIntCnt);
-DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
-DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
-DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
-DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
-DEVICE_COUNTER(rx_dwords, RxDwordCnt);
-DEVICE_COUNTER(rx_ebps, RxEBPCnt);
-DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
-DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
-DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
-DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
-DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
-DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
-DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
-DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
-DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
-DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
-DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
-DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
-DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
-DEVICE_COUNTER(tx_dwords, TxDwordCnt);
-DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
-DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
-DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
-DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
-DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
-DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
-
-static struct attribute *dev_counter_attributes[] = {
-       &dev_attr_ib_link_downeds.attr,
-       &dev_attr_ib_link_err_recoveries.attr,
-       &dev_attr_ib_status_changes.attr,
-       &dev_attr_ib_symbol_errs.attr,
-       &dev_attr_lb_flow_stalls.attr,
-       &dev_attr_lb_ints.attr,
-       &dev_attr_rx_bad_formats.attr,
-       &dev_attr_rx_buf_ovfls.attr,
-       &dev_attr_rx_data_pkts.attr,
-       &dev_attr_rx_dropped_pkts.attr,
-       &dev_attr_rx_dwords.attr,
-       &dev_attr_rx_ebps.attr,
-       &dev_attr_rx_flow_ctrl_errs.attr,
-       &dev_attr_rx_flow_pkts.attr,
-       &dev_attr_rx_icrc_errs.attr,
-       &dev_attr_rx_len_errs.attr,
-       &dev_attr_rx_link_problems.attr,
-       &dev_attr_rx_lpcrc_errs.attr,
-       &dev_attr_rx_max_min_len_errs.attr,
-       &dev_attr_rx_p0_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p1_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p2_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p3_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p4_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p5_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p6_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p7_hdr_egr_ovfls.attr,
-       &dev_attr_rx_p8_hdr_egr_ovfls.attr,
-       &dev_attr_rx_pkey_mismatches.attr,
-       &dev_attr_rx_tid_full_errs.attr,
-       &dev_attr_rx_tid_valid_errs.attr,
-       &dev_attr_rx_vcrc_errs.attr,
-       &dev_attr_tx_data_pkts.attr,
-       &dev_attr_tx_dropped_pkts.attr,
-       &dev_attr_tx_dwords.attr,
-       &dev_attr_tx_flow_pkts.attr,
-       &dev_attr_tx_flow_stalls.attr,
-       &dev_attr_tx_len_errs.attr,
-       &dev_attr_tx_max_min_len_errs.attr,
-       &dev_attr_tx_underruns.attr,
-       &dev_attr_tx_unsup_vl_errs.attr,
-       NULL
-};
-
-static struct attribute_group dev_counter_attr_group = {
-       .name = "counters",
-       .attrs = dev_counter_attributes
-};
-
-static ssize_t store_reset(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       if (count < 5 || memcmp(buf, "reset", 5)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (dd->ipath_flags & IPATH_DISABLED) {
-               /*
-                * post-reset init would re-enable interrupts, etc.
-                * so don't allow reset on disabled devices.  Not
-                * perfect error, but about the best choice.
-                */
-               dev_info(dev,"Unit %d is disabled, can't reset\n",
-                        dd->ipath_unit);
-               ret = -EINVAL;
-               goto bail;
-       }
-       ret = ipath_reset_device(dd->ipath_unit);
-bail:
-       return ret<0 ? ret : count;
-}
-
-static ssize_t store_link_state(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 state;
-
-       ret = ipath_parse_ushort(buf, &state);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_linkstate(dd, state);
-       if (r < 0) {
-               ret = r;
-               goto bail;
-       }
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid link state\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_mtu(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
-}
-
-static ssize_t store_mtu(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       u16 mtu = 0;
-       int r;
-
-       ret = ipath_parse_ushort(buf, &mtu);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_mtu(dd, mtu);
-       if (r < 0)
-               ret = r;
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid MTU\n");
-bail:
-       return ret;
-}
-
-static ssize_t show_enabled(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       return scnprintf(buf, PAGE_SIZE, "%u\n",
-                        (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
-}
-
-static ssize_t store_enabled(struct device *dev,
-                        struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       ssize_t ret;
-       u16 enable = 0;
-
-       ret = ipath_parse_ushort(buf, &enable);
-       if (ret < 0) {
-               ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
-               goto bail;
-       }
-
-       if (enable) {
-               if (!(dd->ipath_flags & IPATH_DISABLED))
-                       goto bail;
-
-               dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
-               /* same as post-reset */
-               ret = ipath_init_chip(dd, 1);
-               if (ret)
-                       ipath_dev_err(dd, "Failed to enable unit %d\n",
-                                     dd->ipath_unit);
-               else {
-                       dd->ipath_flags &= ~IPATH_DISABLED;
-                       *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
-               }
-       } else if (!(dd->ipath_flags & IPATH_DISABLED)) {
-               dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
-               ipath_shutdown_device(dd);
-               dd->ipath_flags |= IPATH_DISABLED;
-               *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
-       }
-
-bail:
-       return ret;
-}
-
-static ssize_t store_rx_pol_inv(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret < 0)
-               goto invalid;
-
-       r = ipath_set_rx_pol_inv(dd, val);
-       if (r < 0) {
-               ret = r;
-               goto bail;
-       }
-
-       goto bail;
-invalid:
-       ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
-bail:
-       return ret;
-}
-
-static ssize_t store_led_override(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret > 0)
-               ipath_set_led_override(dd, val);
-       else
-               ipath_dev_err(dd, "attempt to set invalid LED override\n");
-       return ret;
-}
-
-static ssize_t show_logged_errs(struct device *dev,
-                               struct device_attribute *attr,
-                               char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int idx, count;
-
-       /* force consistency with actual EEPROM */
-       if (ipath_update_eeprom_log(dd) != 0)
-               return -ENXIO;
-
-       count = 0;
-       for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-               count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
-                       dd->ipath_eep_st_errs[idx],
-                       idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
-       }
-
-       return count;
-}
-
-/*
- * New sysfs entries to control various IB config. These all turn into
- * accesses via ipath_f_get/set_ib_cfg.
- *
- * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
- */
-static ssize_t show_hrtbt_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_hrtbt_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 3)
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
-               goto bail;
-       }
-
-       /*
-        * Set the "intentional" heartbeat enable per either of
-        * "Enable" and "Auto", as these are normally set together.
-        * This bit is consulted when leaving loopback mode,
-        * because entering loopback mode overrides it and automatically
-        * disables heartbeat.
-        */
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
-       if (r < 0)
-               ret = r;
-       else if (val == IPATH_IB_HRTBT_OFF)
-               dd->ipath_flags |= IPATH_NO_HRTBT;
-       else
-               dd->ipath_flags &= ~IPATH_NO_HRTBT;
-
-bail:
-       return ret;
-}
-
-/*
- * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
- * _not_ the particular encoding of any given chip)
- */
-static ssize_t show_lwid_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_lwid_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && (val == 0 || val > 3))
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Link Width (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/* Get current link width */
-static ssize_t show_lwid(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-/*
- * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
- */
-static ssize_t show_spd_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_spd_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
-               ret = -EINVAL;
-       if (ret < 0) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Link Speed (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/* Get current link speed */
-static ssize_t show_spd(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-/*
- * Get/Set RX polarity-invert enable. 0=no, 1=yes.
- */
-static ssize_t show_rx_polinv_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_rx_polinv_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 1) {
-               ipath_dev_err(dd,
-                       "attempt to set invalid Rx Polarity (enable)\n");
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-/*
- * Get/Set RX lane-reversal enable. 0=no, 1=yes.
- */
-static ssize_t show_lanerev_enb(struct device *dev,
-                        struct device_attribute *attr,
-                        char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-
-       ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
-       if (ret >= 0)
-               ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-       return ret;
-}
-
-static ssize_t store_lanerev_enb(struct device *dev,
-                         struct device_attribute *attr,
-                         const char *buf,
-                         size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, r;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret >= 0 && val > 1) {
-               ret = -EINVAL;
-               ipath_dev_err(dd,
-                       "attempt to set invalid Lane reversal (enable)\n");
-               goto bail;
-       }
-
-       r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
-       if (r < 0)
-               ret = r;
-
-bail:
-       return ret;
-}
-
-static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
-static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
-
-static struct attribute *driver_attributes[] = {
-       &driver_attr_num_units.attr,
-       &driver_attr_version.attr,
-       NULL
-};
-
-static struct attribute_group driver_attr_group = {
-       .attrs = driver_attributes
-};
-
-static ssize_t store_tempsense(struct device *dev,
-                              struct device_attribute *attr,
-                              const char *buf,
-                              size_t count)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret, stat;
-       u16 val;
-
-       ret = ipath_parse_ushort(buf, &val);
-       if (ret <= 0) {
-               ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
-               goto bail;
-       }
-       /* If anything but the highest limit, enable T_CRIT_A "interrupt" */
-       stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set tempsense config\n");
-               ret = -1;
-               goto bail;
-       }
-       stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set local Tcrit\n");
-               ret = -1;
-               goto bail;
-       }
-       stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
-       if (stat) {
-               ipath_dev_err(dd, "Unable to set remote Tcrit\n");
-               ret = -1;
-               goto bail;
-       }
-
-bail:
-       return ret;
-}
-
-/*
- * dump tempsense regs. in decimal, to ease shell-scripts.
- */
-static ssize_t show_tempsense(struct device *dev,
-                             struct device_attribute *attr,
-                             char *buf)
-{
-       struct ipath_devdata *dd = dev_get_drvdata(dev);
-       int ret;
-       int idx;
-       u8 regvals[8];
-
-       ret = -ENXIO;
-       for (idx = 0; idx < 8; ++idx) {
-               if (idx == 6)
-                       continue;
-               ret = ipath_tempsense_read(dd, idx);
-               if (ret < 0)
-                       break;
-               regvals[idx] = ret;
-       }
-       if (idx == 8)
-               ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
-                       *(signed char *)(regvals),
-                       *(signed char *)(regvals + 1),
-                       regvals[2], regvals[3],
-                       *(signed char *)(regvals + 5),
-                       *(signed char *)(regvals + 7));
-       return ret;
-}
-
-const struct attribute_group *ipath_driver_attr_groups[] = {
-       &driver_attr_group,
-       NULL,
-};
-
-static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
-static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
-static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
-static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
-static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
-static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
-static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
-static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
-static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
-static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
-static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
-static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
-static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
-                  show_jint_max_packets, store_jint_max_packets);
-static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
-                  show_jint_idle_ticks, store_jint_idle_ticks);
-static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
-                  show_tempsense, store_tempsense);
-
-static struct attribute *dev_attributes[] = {
-       &dev_attr_guid.attr,
-       &dev_attr_lmc.attr,
-       &dev_attr_lid.attr,
-       &dev_attr_link_state.attr,
-       &dev_attr_mlid.attr,
-       &dev_attr_mtu.attr,
-       &dev_attr_nguid.attr,
-       &dev_attr_nports.attr,
-       &dev_attr_serial.attr,
-       &dev_attr_status.attr,
-       &dev_attr_status_str.attr,
-       &dev_attr_boardversion.attr,
-       &dev_attr_unit.attr,
-       &dev_attr_enabled.attr,
-       &dev_attr_rx_pol_inv.attr,
-       &dev_attr_led_override.attr,
-       &dev_attr_logged_errors.attr,
-       &dev_attr_tempsense.attr,
-       &dev_attr_localbus_info.attr,
-       NULL
-};
-
-static struct attribute_group dev_attr_group = {
-       .attrs = dev_attributes
-};
-
-static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
-                  store_hrtbt_enb);
-static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
-                  store_lwid_enb);
-static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
-static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
-                  store_spd_enb);
-static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
-static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
-                  store_rx_polinv_enb);
-static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
-                  store_lanerev_enb);
-
-static struct attribute *dev_ibcfg_attributes[] = {
-       &dev_attr_hrtbt_enable.attr,
-       &dev_attr_link_width_enable.attr,
-       &dev_attr_link_width.attr,
-       &dev_attr_link_speed_enable.attr,
-       &dev_attr_link_speed.attr,
-       &dev_attr_rx_pol_inv_enable.attr,
-       &dev_attr_rx_lane_rev_enable.attr,
-       NULL
-};
-
-static struct attribute_group dev_ibcfg_attr_group = {
-       .attrs = dev_ibcfg_attributes
-};
-
-/**
- * ipath_expose_reset - create a device reset file
- * @dev: the device structure
- *
- * Only expose a file that lets us reset the device after someone
- * enters diag mode.  A device reset is quite likely to crash the
- * machine entirely, so we don't want to normally make it
- * available.
- *
- * Called with ipath_mutex held.
- */
-int ipath_expose_reset(struct device *dev)
-{
-       static int exposed;
-       int ret;
-
-       if (!exposed) {
-               ret = device_create_file(dev, &dev_attr_reset);
-               exposed = 1;
-       } else {
-               ret = 0;
-       }
-
-       return ret;
-}
-
-int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
-{
-       int ret;
-
-       ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
-       if (ret)
-               goto bail;
-
-       ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
-       if (ret)
-               goto bail_attrs;
-
-       if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-               ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
-               if (ret)
-                       goto bail_counter;
-               ret = device_create_file(dev, &dev_attr_jint_max_packets);
-               if (ret)
-                       goto bail_idle;
-
-               ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
-               if (ret)
-                       goto bail_max;
-       }
-
-       return 0;
-
-bail_max:
-       device_remove_file(dev, &dev_attr_jint_max_packets);
-bail_idle:
-       device_remove_file(dev, &dev_attr_jint_idle_ticks);
-bail_counter:
-       sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-bail_attrs:
-       sysfs_remove_group(&dev->kobj, &dev_attr_group);
-bail:
-       return ret;
-}
-
-void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
-{
-       sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-
-       if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-               sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
-               device_remove_file(dev, &dev_attr_jint_idle_ticks);
-               device_remove_file(dev, &dev_attr_jint_max_packets);
-       }
-
-       sysfs_remove_group(&dev->kobj, &dev_attr_group);
-
-       device_remove_file(dev, &dev_attr_reset);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
deleted file mode 100644 (file)
index 0246b30..0000000
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/**
- * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_uc_req(struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 hwords;
-       u32 bth0;
-       u32 len;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       int ret = 0;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
-
-       /* header size in 32-bit words LRH+BTH = (8+12)/4. */
-       hwords = 5;
-       bth0 = 1 << 22; /* Set M bit */
-
-       /* Get the next send request. */
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       qp->s_wqe = NULL;
-       switch (qp->s_state) {
-       default:
-               if (!(ib_ipath_state_ops[qp->state] &
-                   IPATH_PROCESS_NEXT_SEND_OK))
-                       goto bail;
-               /* Check if send work queue is empty. */
-               if (qp->s_cur == qp->s_head)
-                       goto bail;
-               /*
-                * Start a new request.
-                */
-               qp->s_psn = wqe->psn = qp->s_next_psn;
-               qp->s_sge.sge = wqe->sg_list[0];
-               qp->s_sge.sg_list = wqe->sg_list + 1;
-               qp->s_sge.num_sge = wqe->wr.num_sge;
-               qp->s_len = len = wqe->length;
-               switch (wqe->wr.opcode) {
-               case IB_WR_SEND:
-               case IB_WR_SEND_WITH_IMM:
-                       if (len > pmtu) {
-                               qp->s_state = OP(SEND_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_SEND)
-                               qp->s_state = OP(SEND_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(SEND_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                       }
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-                       qp->s_wqe = wqe;
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               case IB_WR_RDMA_WRITE:
-               case IB_WR_RDMA_WRITE_WITH_IMM:
-                       ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->rdma_wr.remote_addr);
-                       ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->rdma_wr.rkey);
-                       ohdr->u.rc.reth.length = cpu_to_be32(len);
-                       hwords += sizeof(struct ib_reth) / 4;
-                       if (len > pmtu) {
-                               qp->s_state = OP(RDMA_WRITE_FIRST);
-                               len = pmtu;
-                               break;
-                       }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                               qp->s_state = OP(RDMA_WRITE_ONLY);
-                       else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-                               /* Immediate data comes after the RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-                               hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                                       bth0 |= 1 << 23;
-                       }
-                       qp->s_wqe = wqe;
-                       if (++qp->s_cur >= qp->s_size)
-                               qp->s_cur = 0;
-                       break;
-
-               default:
-                       goto bail;
-               }
-               break;
-
-       case OP(SEND_FIRST):
-               qp->s_state = OP(SEND_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_SEND)
-                       qp->s_state = OP(SEND_LAST);
-               else {
-                       qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-               }
-               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                       bth0 |= 1 << 23;
-               qp->s_wqe = wqe;
-               if (++qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-               qp->s_state = OP(RDMA_WRITE_MIDDLE);
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               len = qp->s_len;
-               if (len > pmtu) {
-                       len = pmtu;
-                       break;
-               }
-               if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-                       qp->s_state = OP(RDMA_WRITE_LAST);
-               else {
-                       qp->s_state =
-                               OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-                       /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
-                       hwords += 1;
-                       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-                               bth0 |= 1 << 23;
-               }
-               qp->s_wqe = wqe;
-               if (++qp->s_cur >= qp->s_size)
-                       qp->s_cur = 0;
-               break;
-       }
-       qp->s_len -= len;
-       qp->s_hdrwords = hwords;
-       qp->s_cur_sge = &qp->s_sge;
-       qp->s_cur_size = len;
-       ipath_make_ruc_header(to_idev(qp->ibqp.device),
-                             qp, ohdr, bth0 | (qp->s_state << 24),
-                             qp->s_next_psn++ & IPATH_PSN_MASK);
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_uc_rcv - handle an incoming UC packet
- * @dev: the device the packet came in on
- * @hdr: the header of the packet
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
- *
- * This is called from ipath_qp_rcv() to process an incoming UC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       int opcode;
-       u32 hdrsize;
-       u32 psn;
-       u32 pad;
-       struct ib_wc wc;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       struct ib_reth *reth;
-       int header_in_data;
-
-       /* Validate the SLID. See Ch. 9.6.1.5 */
-       if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-               goto done;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12;       /* LRH + BTH */
-               psn = be32_to_cpu(ohdr->bth[2]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
-               /*
-                * The header with GRH is 60 bytes and the
-                * core driver sets the eager header buffer
-                * size to 56 bytes so the last 4 bytes of
-                * the BTH header (PSN) is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       psn = be32_to_cpu(((__be32 *) data)[0]);
-                       data += sizeof(__be32);
-               } else
-                       psn = be32_to_cpu(ohdr->bth[2]);
-       }
-       /*
-        * The opcode is in the low byte when its in network order
-        * (top byte when in host order).
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
-       memset(&wc, 0, sizeof wc);
-
-       /* Compare the PSN verses the expected PSN. */
-       if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
-               /*
-                * Handle a sequence error.
-                * Silently drop any current message.
-                */
-               qp->r_psn = psn;
-       inv:
-               qp->r_state = OP(SEND_LAST);
-               switch (opcode) {
-               case OP(SEND_FIRST):
-               case OP(SEND_ONLY):
-               case OP(SEND_ONLY_WITH_IMMEDIATE):
-                       goto send_first;
-
-               case OP(RDMA_WRITE_FIRST):
-               case OP(RDMA_WRITE_ONLY):
-               case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-                       goto rdma_first;
-
-               default:
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-       }
-
-       /* Check for opcode sequence errors. */
-       switch (qp->r_state) {
-       case OP(SEND_FIRST):
-       case OP(SEND_MIDDLE):
-               if (opcode == OP(SEND_MIDDLE) ||
-                   opcode == OP(SEND_LAST) ||
-                   opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_MIDDLE):
-               if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-                   opcode == OP(RDMA_WRITE_LAST) ||
-                   opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-
-       default:
-               if (opcode == OP(SEND_FIRST) ||
-                   opcode == OP(SEND_ONLY) ||
-                   opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
-                   opcode == OP(RDMA_WRITE_FIRST) ||
-                   opcode == OP(RDMA_WRITE_ONLY) ||
-                   opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       break;
-               goto inv;
-       }
-
-       /* OK, process the packet. */
-       switch (opcode) {
-       case OP(SEND_FIRST):
-       case OP(SEND_ONLY):
-       case OP(SEND_ONLY_WITH_IMMEDIATE):
-       send_first:
-               if (qp->r_flags & IPATH_R_REUSE_SGE) {
-                       qp->r_flags &= ~IPATH_R_REUSE_SGE;
-                       qp->r_sge = qp->s_rdma_read_sge;
-               } else if (!ipath_get_rwqe(qp, 0)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Save the WQE so we can reuse it in case of an error. */
-               qp->s_rdma_read_sge = qp->r_sge;
-               qp->r_rcv_len = 0;
-               if (opcode == OP(SEND_ONLY))
-                       goto send_last;
-               else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
-                       goto send_last_imm;
-               /* FALLTHROUGH */
-       case OP(SEND_MIDDLE):
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len)) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(SEND_LAST_WITH_IMMEDIATE):
-       send_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               /* FALLTHROUGH */
-       case OP(SEND_LAST):
-       send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               wc.byte_len = tlen + qp->r_rcv_len;
-               if (unlikely(wc.byte_len > qp->r_len)) {
-                       qp->r_flags |= IPATH_R_REUSE_SGE;
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               wc.opcode = IB_WC_RECV;
-       last_imm:
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               wc.wr_id = qp->r_wr_id;
-               wc.status = IB_WC_SUCCESS;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = qp->remote_qpn;
-               wc.slid = qp->remote_ah_attr.dlid;
-               wc.sl = qp->remote_ah_attr.sl;
-               /* Signal completion event if the solicited bit is set. */
-               ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                              (ohdr->bth[0] &
-                               cpu_to_be32(1 << 23)) != 0);
-               break;
-
-       case OP(RDMA_WRITE_FIRST):
-       case OP(RDMA_WRITE_ONLY):
-       case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
-       rdma_first:
-               /* RETH comes after BTH */
-               if (!header_in_data)
-                       reth = &ohdr->u.rc.reth;
-               else {
-                       reth = (struct ib_reth *)data;
-                       data += sizeof(*reth);
-               }
-               hdrsize += sizeof(*reth);
-               qp->r_len = be32_to_cpu(reth->length);
-               qp->r_rcv_len = 0;
-               if (qp->r_len != 0) {
-                       u32 rkey = be32_to_cpu(reth->rkey);
-                       u64 vaddr = be64_to_cpu(reth->vaddr);
-                       int ok;
-
-                       /* Check rkey */
-                       ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
-                                          vaddr, rkey,
-                                          IB_ACCESS_REMOTE_WRITE);
-                       if (unlikely(!ok)) {
-                               dev->n_pkt_drops++;
-                               goto done;
-                       }
-               } else {
-                       qp->r_sge.sg_list = NULL;
-                       qp->r_sge.sge.mr = NULL;
-                       qp->r_sge.sge.vaddr = NULL;
-                       qp->r_sge.sge.length = 0;
-                       qp->r_sge.sge.sge_length = 0;
-               }
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               if (opcode == OP(RDMA_WRITE_ONLY))
-                       goto rdma_last;
-               else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-                       goto rdma_last_imm;
-               /* FALLTHROUGH */
-       case OP(RDMA_WRITE_MIDDLE):
-               /* Check for invalid length PMTU or posted rwqe len. */
-               if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               qp->r_rcv_len += pmtu;
-               if (unlikely(qp->r_rcv_len > qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, pmtu);
-               break;
-
-       case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-       rdma_last_imm:
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.ex.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               if (qp->r_flags & IPATH_R_REUSE_SGE)
-                       qp->r_flags &= ~IPATH_R_REUSE_SGE;
-               else if (!ipath_get_rwqe(qp, 1)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               wc.byte_len = qp->r_len;
-               wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               goto last_imm;
-
-       case OP(RDMA_WRITE_LAST):
-       rdma_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-               /* Check for invalid length. */
-               /* XXX LAST len should be >= 1 */
-               if (unlikely(tlen < (hdrsize + pad + 4))) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               /* Don't count the CRC. */
-               tlen -= (hdrsize + pad + 4);
-               if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-                       dev->n_pkt_drops++;
-                       goto done;
-               }
-               ipath_copy_sge(&qp->r_sge, data, tlen);
-               break;
-
-       default:
-               /* Drop packet for unknown opcodes. */
-               dev->n_pkt_drops++;
-               goto done;
-       }
-       qp->r_psn++;
-       qp->r_state = opcode;
-done:
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
deleted file mode 100644 (file)
index 385d941..0000000
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the sending QP
- * @swqe: the send work request
- *
- * This is called from ipath_make_ud_req() to forward a WQE addressed
- * to the same HCA.
- * Note that the receive interrupt handler may be calling ipath_ud_rcv()
- * while this is being called.
- */
-static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
-{
-       struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-       struct ipath_qp *qp;
-       struct ib_ah_attr *ah_attr;
-       unsigned long flags;
-       struct ipath_rq *rq;
-       struct ipath_srq *srq;
-       struct ipath_sge_state rsge;
-       struct ipath_sge *sge;
-       struct ipath_rwq *wq;
-       struct ipath_rwqe *wqe;
-       void (*handler)(struct ib_event *, void *);
-       struct ib_wc wc;
-       u32 tail;
-       u32 rlen;
-       u32 length;
-
-       qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
-       if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               goto done;
-       }
-
-       /*
-        * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
-        * Qkeys with the high order bit set mean use the
-        * qkey from the QP context instead of the WR (see 10.2.5).
-        */
-       if (unlikely(qp->ibqp.qp_num &&
-                    ((int) swqe->ud_wr.remote_qkey < 0 ?
-                     sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
-               /* XXX OK to lose a count once in a while. */
-               dev->qkey_violations++;
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-
-       /*
-        * A GRH is expected to precede the data even if not
-        * present on the wire.
-        */
-       length = swqe->length;
-       memset(&wc, 0, sizeof wc);
-       wc.byte_len = length + sizeof(struct ib_grh);
-
-       if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.ex.imm_data = swqe->wr.ex.imm_data;
-       }
-
-       /*
-        * This would be a lot simpler if we could call ipath_get_rwqe()
-        * but that uses state that the receive interrupt handler uses
-        * so we would need to lock out receive interrupts while doing
-        * local loopback.
-        */
-       if (qp->ibqp.srq) {
-               srq = to_isrq(qp->ibqp.srq);
-               handler = srq->ibsrq.event_handler;
-               rq = &srq->rq;
-       } else {
-               srq = NULL;
-               handler = NULL;
-               rq = &qp->r_rq;
-       }
-
-       /*
-        * Get the next work request entry to find where to put the data.
-        * Note that it is safe to drop the lock after changing rq->tail
-        * since ipath_post_receive() won't fill the empty slot.
-        */
-       spin_lock_irqsave(&rq->lock, flags);
-       wq = rq->wq;
-       tail = wq->tail;
-       /* Validate tail before using it since it is user writable. */
-       if (tail >= rq->size)
-               tail = 0;
-       if (unlikely(tail == wq->head)) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       wqe = get_rwqe_ptr(rq, tail);
-       rsge.sg_list = qp->r_ud_sg_list;
-       if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       /* Silently drop packets which are too big. */
-       if (wc.byte_len > rlen) {
-               spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
-               goto drop;
-       }
-       if (++tail >= rq->size)
-               tail = 0;
-       wq->tail = tail;
-       wc.wr_id = wqe->wr_id;
-       if (handler) {
-               u32 n;
-
-               /*
-                * validate head pointer value and compute
-                * the number of remaining WQEs.
-                */
-               n = wq->head;
-               if (n >= rq->size)
-                       n = 0;
-               if (n < tail)
-                       n += rq->size - tail;
-               else
-                       n -= tail;
-               if (n < srq->limit) {
-                       struct ib_event ev;
-
-                       srq->limit = 0;
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       ev.device = qp->ibqp.device;
-                       ev.element.srq = qp->ibqp.srq;
-                       ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-                       handler(&ev, srq->ibsrq.srq_context);
-               } else
-                       spin_unlock_irqrestore(&rq->lock, flags);
-       } else
-               spin_unlock_irqrestore(&rq->lock, flags);
-
-       ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
-               wc.wc_flags |= IB_WC_GRH;
-       } else
-               ipath_skip_sge(&rsge, sizeof(struct ib_grh));
-       sge = swqe->sg_list;
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               ipath_copy_sge(&rsge, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--swqe->wr.num_sge)
-                               sge++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               length -= len;
-       }
-       wc.status = IB_WC_SUCCESS;
-       wc.opcode = IB_WC_RECV;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = sqp->ibqp.qp_num;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc.pkey_index = 0;
-       wc.slid = dev->dd->ipath_lid |
-               (ah_attr->src_path_bits &
-                ((1 << dev->dd->ipath_lmc) - 1));
-       wc.sl = ah_attr->sl;
-       wc.dlid_path_bits =
-               ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
-drop:
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-done:;
-}
-
-/**
- * ipath_make_ud_req - construct a UD request packet
- * @qp: the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_ud_req(struct ipath_qp *qp)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_other_headers *ohdr;
-       struct ib_ah_attr *ah_attr;
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 nwords;
-       u32 extra_bytes;
-       u32 bth0;
-       u16 lrh0;
-       u16 lid;
-       int ret = 0;
-       int next_cur;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-               if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-                       goto bail;
-               /* We are in the error state, flush the work request. */
-               if (qp->s_last == qp->s_head)
-                       goto bail;
-               /* If DMAs are in progress, we can't flush immediately. */
-               if (atomic_read(&qp->s_dma_busy)) {
-                       qp->s_flags |= IPATH_S_WAIT_DMA;
-                       goto bail;
-               }
-               wqe = get_swqe_ptr(qp, qp->s_last);
-               ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-               goto done;
-       }
-
-       if (qp->s_cur == qp->s_head)
-               goto bail;
-
-       wqe = get_swqe_ptr(qp, qp->s_cur);
-       next_cur = qp->s_cur + 1;
-       if (next_cur >= qp->s_size)
-               next_cur = 0;
-
-       /* Construct the header. */
-       ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-       if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
-               if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
-                       dev->n_multicast_xmit++;
-               else
-                       dev->n_unicast_xmit++;
-       } else {
-               dev->n_unicast_xmit++;
-               lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
-               if (unlikely(lid == dev->dd->ipath_lid)) {
-                       /*
-                        * If DMAs are in progress, we can't generate
-                        * a completion for the loopback packet since
-                        * it would be out of order.
-                        * XXX Instead of waiting, we could queue a
-                        * zero length descriptor so we get a callback.
-                        */
-                       if (atomic_read(&qp->s_dma_busy)) {
-                               qp->s_flags |= IPATH_S_WAIT_DMA;
-                               goto bail;
-                       }
-                       qp->s_cur = next_cur;
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       ipath_ud_loopback(qp, wqe);
-                       spin_lock_irqsave(&qp->s_lock, flags);
-                       ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
-                       goto done;
-               }
-       }
-
-       qp->s_cur = next_cur;
-       extra_bytes = -wqe->length & 3;
-       nwords = (wqe->length + extra_bytes) >> 2;
-
-       /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
-       qp->s_hdrwords = 7;
-       qp->s_cur_size = wqe->length;
-       qp->s_cur_sge = &qp->s_sge;
-       qp->s_dmult = ah_attr->static_rate;
-       qp->s_wqe = wqe;
-       qp->s_sge.sge = wqe->sg_list[0];
-       qp->s_sge.sg_list = wqe->sg_list + 1;
-       qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
-
-       if (ah_attr->ah_flags & IB_AH_GRH) {
-               /* Header size in 32-bit words. */
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &ah_attr->grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-               ohdr = &qp->s_hdr.u.l.oth;
-               /*
-                * Don't worry about sending to locally attached multicast
-                * QPs.  It is unspecified by the spec. what happens.
-                */
-       } else {
-               /* Header size in 32-bit words. */
-               lrh0 = IPATH_LRH_BTH;
-               ohdr = &qp->s_hdr.u.oth;
-       }
-       if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
-               qp->s_hdrwords++;
-               ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
-               bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-       } else
-               bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
-       lrh0 |= ah_attr->sl << 4;
-       if (qp->ibqp.qp_type == IB_QPT_SMI)
-               lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-                                          SIZE_OF_CRC);
-       lid = dev->dd->ipath_lid;
-       if (lid) {
-               lid |= ah_attr->src_path_bits &
-                       ((1 << dev->dd->ipath_lmc) - 1);
-               qp->s_hdr.lrh[3] = cpu_to_be16(lid);
-       } else
-               qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-       if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
-               bth0 |= 1 << 23;
-       bth0 |= extra_bytes << 20;
-       bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-               ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       /*
-        * Use the multicast QP if the destination LID is a multicast LID.
-        */
-       ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-               ah_attr->dlid != IPATH_PERMISSIVE_LID ?
-               cpu_to_be32(IPATH_MULTICAST_QPN) :
-               cpu_to_be32(wqe->ud_wr.remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
-       /*
-        * Qkeys with the high order bit set mean use the
-        * qkey from the QP context instead of the WR (see 10.2.5).
-        */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
-                                        qp->qkey : wqe->ud_wr.remote_qkey);
-       ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
-
-done:
-       ret = 1;
-       goto unlock;
-
-bail:
-       qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_ud_rcv - receive an incoming UD packet
- * @dev: the device the packet came in on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_qp_rcv() to process an incoming UD packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-       struct ipath_other_headers *ohdr;
-       int opcode;
-       u32 hdrsize;
-       u32 pad;
-       struct ib_wc wc;
-       u32 qkey;
-       u32 src_qp;
-       u16 dlid;
-       int header_in_data;
-
-       /* Check for GRH */
-       if (!has_grh) {
-               ohdr = &hdr->u.oth;
-               hdrsize = 8 + 12 + 8;   /* LRH + BTH + DETH */
-               qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-               src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-               header_in_data = 0;
-       } else {
-               ohdr = &hdr->u.l.oth;
-               hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
-               /*
-                * The header with GRH is 68 bytes and the core driver sets
-                * the eager header buffer size to 56 bytes so the last 12
-                * bytes of the IB header is in the data buffer.
-                */
-               header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-               if (header_in_data) {
-                       qkey = be32_to_cpu(((__be32 *) data)[1]);
-                       src_qp = be32_to_cpu(((__be32 *) data)[2]);
-                       data += 12;
-               } else {
-                       qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-                       src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-               }
-       }
-       src_qp &= IPATH_QPN_MASK;
-
-       /*
-        * Check that the permissive LID is only used on QP0
-        * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
-        */
-       if (qp->ibqp.qp_num) {
-               if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
-                            hdr->lrh[3] == IB_LID_PERMISSIVE)) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               if (unlikely(qkey != qp->qkey)) {
-                       /* XXX OK to lose a count once in a while. */
-                       dev->qkey_violations++;
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-       } else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
-                  hdr->lrh[3] == IB_LID_PERMISSIVE) {
-               struct ib_smp *smp = (struct ib_smp *) data;
-
-               if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-       }
-
-       /*
-        * The opcode is in the low byte when its in network order
-        * (top byte when in host order).
-        */
-       opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-       if (qp->ibqp.qp_num > 1 &&
-           opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
-               if (header_in_data) {
-                       wc.ex.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else
-                       wc.ex.imm_data = ohdr->u.ud.imm_data;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               hdrsize += sizeof(u32);
-       } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
-               wc.ex.imm_data = 0;
-               wc.wc_flags = 0;
-       } else {
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-
-       /* Get the number of bytes the message was padded by. */
-       pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-       if (unlikely(tlen < (hdrsize + pad + 4))) {
-               /* Drop incomplete packets. */
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-       tlen -= hdrsize + pad + 4;
-
-       /* Drop invalid MAD packets (see 13.5.3.1). */
-       if (unlikely((qp->ibqp.qp_num == 0 &&
-                     (tlen != 256 ||
-                      (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
-                    (qp->ibqp.qp_num == 1 &&
-                     (tlen != 256 ||
-                      (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-
-       /*
-        * A GRH is expected to precede the data even if not
-        * present on the wire.
-        */
-       wc.byte_len = tlen + sizeof(struct ib_grh);
-
-       /*
-        * Get the next work request entry to find where to put the data.
-        */
-       if (qp->r_flags & IPATH_R_REUSE_SGE)
-               qp->r_flags &= ~IPATH_R_REUSE_SGE;
-       else if (!ipath_get_rwqe(qp, 0)) {
-               /*
-                * Count VL15 packets dropped due to no receive buffer.
-                * Otherwise, count them as buffer overruns since usually,
-                * the HW will be able to receive packets even if there are
-                * no QPs with posted receive buffers.
-                */
-               if (qp->ibqp.qp_num == 0)
-                       dev->n_vl15_dropped++;
-               else
-                       dev->rcv_errors++;
-               goto bail;
-       }
-       /* Silently drop packets which are too big. */
-       if (wc.byte_len > qp->r_len) {
-               qp->r_flags |= IPATH_R_REUSE_SGE;
-               dev->n_pkt_drops++;
-               goto bail;
-       }
-       if (has_grh) {
-               ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-                              sizeof(struct ib_grh));
-               wc.wc_flags |= IB_WC_GRH;
-       } else
-               ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
-       ipath_copy_sge(&qp->r_sge, data,
-                      wc.byte_len - sizeof(struct ib_grh));
-       if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-               goto bail;
-       wc.wr_id = qp->r_wr_id;
-       wc.status = IB_WC_SUCCESS;
-       wc.opcode = IB_WC_RECV;
-       wc.vendor_err = 0;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = src_qp;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc.pkey_index = 0;
-       wc.slid = be16_to_cpu(hdr->lrh[3]);
-       wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
-       dlid = be16_to_cpu(hdr->lrh[1]);
-       /*
-        * Save the LMC lower bits if the destination LID is a unicast LID.
-        */
-       wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
-               dlid & ((1 << dev->dd->ipath_lmc) - 1);
-       wc.port_num = 1;
-       /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      (ohdr->bth[0] &
-                       cpu_to_be32(1 << 23)) != 0);
-
-bail:;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c
deleted file mode 100644 (file)
index d29b4da..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/mm.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-static void __ipath_release_user_pages(struct page **p, size_t num_pages,
-                                  int dirty)
-{
-       size_t i;
-
-       for (i = 0; i < num_pages; i++) {
-               ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
-                          (unsigned long) num_pages, p[i]);
-               if (dirty)
-                       set_page_dirty_lock(p[i]);
-               put_page(p[i]);
-       }
-}
-
-/* call with current->mm->mmap_sem held */
-static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-                                 struct page **p)
-{
-       unsigned long lock_limit;
-       size_t got;
-       int ret;
-
-       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-       if (num_pages > lock_limit) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
-                  (unsigned long) num_pages, start_page);
-
-       for (got = 0; got < num_pages; got += ret) {
-               ret = get_user_pages(current, current->mm,
-                                    start_page + got * PAGE_SIZE,
-                                    num_pages - got, 1, 1,
-                                    p + got, NULL);
-               if (ret < 0)
-                       goto bail_release;
-       }
-
-       current->mm->pinned_vm += num_pages;
-
-       ret = 0;
-       goto bail;
-
-bail_release:
-       __ipath_release_user_pages(p, got, 0);
-bail:
-       return ret;
-}
-
-/**
- * ipath_map_page - a safety wrapper around pci_map_page()
- *
- * A dma_addr of all 0's is interpreted by the chip as "disabled".
- * Unfortunately, it can also be a valid dma_addr returned on some
- * architectures.
- *
- * The powerpc iommu assigns dma_addrs in ascending order, so we don't
- * have to bother with retries or mapping a dummy page to insure we
- * don't just get the same mapping again.
- *
- * I'm sure we won't be so lucky with other iommu's, so FIXME.
- */
-dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
-       unsigned long offset, size_t size, int direction)
-{
-       dma_addr_t phys;
-
-       phys = pci_map_page(hwdev, page, offset, size, direction);
-
-       if (phys == 0) {
-               pci_unmap_page(hwdev, phys, size, direction);
-               phys = pci_map_page(hwdev, page, offset, size, direction);
-               /*
-                * FIXME: If we get 0 again, we should keep this page,
-                * map another, then free the 0 page.
-                */
-       }
-
-       return phys;
-}
-
-/**
- * ipath_map_single - a safety wrapper around pci_map_single()
- *
- * Same idea as ipath_map_page().
- */
-dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
-       int direction)
-{
-       dma_addr_t phys;
-
-       phys = pci_map_single(hwdev, ptr, size, direction);
-
-       if (phys == 0) {
-               pci_unmap_single(hwdev, phys, size, direction);
-               phys = pci_map_single(hwdev, ptr, size, direction);
-               /*
-                * FIXME: If we get 0 again, we should keep this page,
-                * map another, then free the 0 page.
-                */
-       }
-
-       return phys;
-}
-
-/**
- * ipath_get_user_pages - lock user pages into memory
- * @start_page: the start page
- * @num_pages: the number of pages
- * @p: the output page structures
- *
- * This function takes a given start page (page aligned user virtual
- * address) and pins it and the following specified number of pages.  For
- * now, num_pages is always 1, but that will probably change at some point
- * (because caller is doing expected sends on a single virtually contiguous
- * buffer, so we can do all pages at once).
- */
-int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-                        struct page **p)
-{
-       int ret;
-
-       down_write(&current->mm->mmap_sem);
-
-       ret = __ipath_get_user_pages(start_page, num_pages, p);
-
-       up_write(&current->mm->mmap_sem);
-
-       return ret;
-}
-
-void ipath_release_user_pages(struct page **p, size_t num_pages)
-{
-       down_write(&current->mm->mmap_sem);
-
-       __ipath_release_user_pages(p, num_pages, 1);
-
-       current->mm->pinned_vm -= num_pages;
-
-       up_write(&current->mm->mmap_sem);
-}
-
-struct ipath_user_pages_work {
-       struct work_struct work;
-       struct mm_struct *mm;
-       unsigned long num_pages;
-};
-
-static void user_pages_account(struct work_struct *_work)
-{
-       struct ipath_user_pages_work *work =
-               container_of(_work, struct ipath_user_pages_work, work);
-
-       down_write(&work->mm->mmap_sem);
-       work->mm->pinned_vm -= work->num_pages;
-       up_write(&work->mm->mmap_sem);
-       mmput(work->mm);
-       kfree(work);
-}
-
-void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
-{
-       struct ipath_user_pages_work *work;
-       struct mm_struct *mm;
-
-       __ipath_release_user_pages(p, num_pages, 1);
-
-       mm = get_task_mm(current);
-       if (!mm)
-               return;
-
-       work = kmalloc(sizeof(*work), GFP_KERNEL);
-       if (!work)
-               goto bail_mm;
-
-       INIT_WORK(&work->work, user_pages_account);
-       work->mm = mm;
-       work->num_pages = num_pages;
-
-       queue_work(ib_wq, &work->work);
-       return;
-
-bail_mm:
-       mmput(mm);
-       return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.c b/drivers/staging/rdma/ipath/ipath_user_sdma.c
deleted file mode 100644 (file)
index 8c12e3c..0000000
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/dmapool.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/uio.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_user_sdma.h"
-
-/* minimum size of header */
-#define IPATH_USER_SDMA_MIN_HEADER_LENGTH      64
-/* expected size of headers (for dma_pool) */
-#define IPATH_USER_SDMA_EXP_HEADER_LENGTH      64
-/* length mask in PBC (lower 11 bits) */
-#define IPATH_PBC_LENGTH_MASK                  ((1 << 11) - 1)
-
-struct ipath_user_sdma_pkt {
-       u8 naddr;               /* dimension of addr (1..3) ... */
-       u32 counter;            /* sdma pkts queued counter for this entry */
-       u64 added;              /* global descq number of entries */
-
-       struct {
-               u32 offset;                     /* offset for kvaddr, addr */
-               u32 length;                     /* length in page */
-               u8  put_page;                   /* should we put_page? */
-               u8  dma_mapped;                 /* is page dma_mapped? */
-               struct page *page;              /* may be NULL (coherent mem) */
-               void *kvaddr;                   /* FIXME: only for pio hack */
-               dma_addr_t addr;
-       } addr[4];   /* max pages, any more and we coalesce */
-       struct list_head list;  /* list element */
-};
-
-struct ipath_user_sdma_queue {
-       /*
-        * pkts sent to dma engine are queued on this
-        * list head.  the type of the elements of this
-        * list are struct ipath_user_sdma_pkt...
-        */
-       struct list_head sent;
-
-       /* headers with expected length are allocated from here... */
-       char header_cache_name[64];
-       struct dma_pool *header_cache;
-
-       /* packets are allocated from the slab cache... */
-       char pkt_slab_name[64];
-       struct kmem_cache *pkt_slab;
-
-       /* as packets go on the queued queue, they are counted... */
-       u32 counter;
-       u32 sent_counter;
-
-       /* dma page table */
-       struct rb_root dma_pages_root;
-
-       /* protect everything above... */
-       struct mutex lock;
-};
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
-{
-       struct ipath_user_sdma_queue *pq =
-               kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
-
-       if (!pq)
-               goto done;
-
-       pq->counter = 0;
-       pq->sent_counter = 0;
-       INIT_LIST_HEAD(&pq->sent);
-
-       mutex_init(&pq->lock);
-
-       snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
-                "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
-       pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
-                                        sizeof(struct ipath_user_sdma_pkt),
-                                        0, 0, NULL);
-
-       if (!pq->pkt_slab)
-               goto err_kfree;
-
-       snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
-                "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
-       pq->header_cache = dma_pool_create(pq->header_cache_name,
-                                          dev,
-                                          IPATH_USER_SDMA_EXP_HEADER_LENGTH,
-                                          4, 0);
-       if (!pq->header_cache)
-               goto err_slab;
-
-       pq->dma_pages_root = RB_ROOT;
-
-       goto done;
-
-err_slab:
-       kmem_cache_destroy(pq->pkt_slab);
-err_kfree:
-       kfree(pq);
-       pq = NULL;
-
-done:
-       return pq;
-}
-
-static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
-                                     int i, size_t offset, size_t len,
-                                     int put_page, int dma_mapped,
-                                     struct page *page,
-                                     void *kvaddr, dma_addr_t dma_addr)
-{
-       pkt->addr[i].offset = offset;
-       pkt->addr[i].length = len;
-       pkt->addr[i].put_page = put_page;
-       pkt->addr[i].dma_mapped = dma_mapped;
-       pkt->addr[i].page = page;
-       pkt->addr[i].kvaddr = kvaddr;
-       pkt->addr[i].addr = dma_addr;
-}
-
-static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
-                                       u32 counter, size_t offset,
-                                       size_t len, int dma_mapped,
-                                       struct page *page,
-                                       void *kvaddr, dma_addr_t dma_addr)
-{
-       pkt->naddr = 1;
-       pkt->counter = counter;
-       ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
-                                 kvaddr, dma_addr);
-}
-
-/* we've too many pages in the iovec, coalesce to a single page */
-static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
-                                   struct ipath_user_sdma_pkt *pkt,
-                                   const struct iovec *iov,
-                                   unsigned long niov) {
-       int ret = 0;
-       struct page *page = alloc_page(GFP_KERNEL);
-       void *mpage_save;
-       char *mpage;
-       int i;
-       int len = 0;
-       dma_addr_t dma_addr;
-
-       if (!page) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       mpage = kmap(page);
-       mpage_save = mpage;
-       for (i = 0; i < niov; i++) {
-               int cfur;
-
-               cfur = copy_from_user(mpage,
-                                     iov[i].iov_base, iov[i].iov_len);
-               if (cfur) {
-                       ret = -EFAULT;
-                       goto free_unmap;
-               }
-
-               mpage += iov[i].iov_len;
-               len += iov[i].iov_len;
-       }
-
-       dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
-                               DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-               ret = -ENOMEM;
-               goto free_unmap;
-       }
-
-       ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
-                                 dma_addr);
-       pkt->naddr = 2;
-
-       goto done;
-
-free_unmap:
-       kunmap(page);
-       __free_page(page);
-done:
-       return ret;
-}
-
-/* how many pages in this iovec element? */
-static int ipath_user_sdma_num_pages(const struct iovec *iov)
-{
-       const unsigned long addr  = (unsigned long) iov->iov_base;
-       const unsigned long  len  = iov->iov_len;
-       const unsigned long spage = addr & PAGE_MASK;
-       const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-
-       return 1 + ((epage - spage) >> PAGE_SHIFT);
-}
-
-/* truncate length to page boundary */
-static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
-       const unsigned long offset = offset_in_page(addr);
-
-       return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
-static void ipath_user_sdma_free_pkt_frag(struct device *dev,
-                                         struct ipath_user_sdma_queue *pq,
-                                         struct ipath_user_sdma_pkt *pkt,
-                                         int frag)
-{
-       const int i = frag;
-
-       if (pkt->addr[i].page) {
-               if (pkt->addr[i].dma_mapped)
-                       dma_unmap_page(dev,
-                                      pkt->addr[i].addr,
-                                      pkt->addr[i].length,
-                                      DMA_TO_DEVICE);
-
-               if (pkt->addr[i].kvaddr)
-                       kunmap(pkt->addr[i].page);
-
-               if (pkt->addr[i].put_page)
-                       put_page(pkt->addr[i].page);
-               else
-                       __free_page(pkt->addr[i].page);
-       } else if (pkt->addr[i].kvaddr)
-               /* free coherent mem from cache... */
-               dma_pool_free(pq->header_cache,
-                             pkt->addr[i].kvaddr, pkt->addr[i].addr);
-}
-
-/* return number of pages pinned... */
-static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
-                                    struct ipath_user_sdma_pkt *pkt,
-                                    unsigned long addr, int tlen, int npages)
-{
-       struct page *pages[2];
-       int j;
-       int ret;
-
-       ret = get_user_pages_fast(addr, npages, 0, pages);
-       if (ret != npages) {
-               int i;
-
-               for (i = 0; i < ret; i++)
-                       put_page(pages[i]);
-
-               ret = -ENOMEM;
-               goto done;
-       }
-
-       for (j = 0; j < npages; j++) {
-               /* map the pages... */
-               const int flen =
-                       ipath_user_sdma_page_length(addr, tlen);
-               dma_addr_t dma_addr =
-                       dma_map_page(&dd->pcidev->dev,
-                                    pages[j], 0, flen, DMA_TO_DEVICE);
-               unsigned long fofs = offset_in_page(addr);
-
-               if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-                       ret = -ENOMEM;
-                       goto done;
-               }
-
-               ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
-                                         pages[j], kmap(pages[j]),
-                                         dma_addr);
-
-               pkt->naddr++;
-               addr += flen;
-               tlen -= flen;
-       }
-
-done:
-       return ret;
-}
-
-static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
-                                  struct ipath_user_sdma_queue *pq,
-                                  struct ipath_user_sdma_pkt *pkt,
-                                  const struct iovec *iov,
-                                  unsigned long niov)
-{
-       int ret = 0;
-       unsigned long idx;
-
-       for (idx = 0; idx < niov; idx++) {
-               const int npages = ipath_user_sdma_num_pages(iov + idx);
-               const unsigned long addr = (unsigned long) iov[idx].iov_base;
-
-               ret = ipath_user_sdma_pin_pages(dd, pkt,
-                                               addr, iov[idx].iov_len,
-                                               npages);
-               if (ret < 0)
-                       goto free_pkt;
-       }
-
-       goto done;
-
-free_pkt:
-       for (idx = 0; idx < pkt->naddr; idx++)
-               ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
-
-done:
-       return ret;
-}
-
-static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
-                                       struct ipath_user_sdma_queue *pq,
-                                       struct ipath_user_sdma_pkt *pkt,
-                                       const struct iovec *iov,
-                                       unsigned long niov, int npages)
-{
-       int ret = 0;
-
-       if (npages >= ARRAY_SIZE(pkt->addr))
-               ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
-       else
-               ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
-
-       return ret;
-}
-
-/* free a packet list -- return counter value of last packet */
-static void ipath_user_sdma_free_pkt_list(struct device *dev,
-                                         struct ipath_user_sdma_queue *pq,
-                                         struct list_head *list)
-{
-       struct ipath_user_sdma_pkt *pkt, *pkt_next;
-
-       list_for_each_entry_safe(pkt, pkt_next, list, list) {
-               int i;
-
-               for (i = 0; i < pkt->naddr; i++)
-                       ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-
-               kmem_cache_free(pq->pkt_slab, pkt);
-       }
-}
-
-/*
- * copy headers, coalesce etc -- pq->lock must be held
- *
- * we queue all the packets to list, returning the
- * number of bytes total.  list must be empty initially,
- * as, if there is an error we clean it...
- */
-static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
-                                     struct ipath_user_sdma_queue *pq,
-                                     struct list_head *list,
-                                     const struct iovec *iov,
-                                     unsigned long niov,
-                                     int maxpkts)
-{
-       unsigned long idx = 0;
-       int ret = 0;
-       int npkts = 0;
-       struct page *page = NULL;
-       __le32 *pbc;
-       dma_addr_t dma_addr;
-       struct ipath_user_sdma_pkt *pkt = NULL;
-       size_t len;
-       size_t nw;
-       u32 counter = pq->counter;
-       int dma_mapped = 0;
-
-       while (idx < niov && npkts < maxpkts) {
-               const unsigned long addr = (unsigned long) iov[idx].iov_base;
-               const unsigned long idx_save = idx;
-               unsigned pktnw;
-               unsigned pktnwc;
-               int nfrags = 0;
-               int npages = 0;
-               int cfur;
-
-               dma_mapped = 0;
-               len = iov[idx].iov_len;
-               nw = len >> 2;
-               page = NULL;
-
-               pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-               if (!pkt) {
-                       ret = -ENOMEM;
-                       goto free_list;
-               }
-
-               if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
-                   len > PAGE_SIZE || len & 3 || addr & 3) {
-                       ret = -EINVAL;
-                       goto free_pkt;
-               }
-
-               if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
-                       pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
-                                            &dma_addr);
-               else
-                       pbc = NULL;
-
-               if (!pbc) {
-                       page = alloc_page(GFP_KERNEL);
-                       if (!page) {
-                               ret = -ENOMEM;
-                               goto free_pkt;
-                       }
-                       pbc = kmap(page);
-               }
-
-               cfur = copy_from_user(pbc, iov[idx].iov_base, len);
-               if (cfur) {
-                       ret = -EFAULT;
-                       goto free_pbc;
-               }
-
-               /*
-                * this assignment is a bit strange.  it's because the
-                * the pbc counts the number of 32 bit words in the full
-                * packet _except_ the first word of the pbc itself...
-                */
-               pktnwc = nw - 1;
-
-               /*
-                * pktnw computation yields the number of 32 bit words
-                * that the caller has indicated in the PBC.  note that
-                * this is one less than the total number of words that
-                * goes to the send DMA engine as the first 32 bit word
-                * of the PBC itself is not counted.  Armed with this count,
-                * we can verify that the packet is consistent with the
-                * iovec lengths.
-                */
-               pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
-               if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
-                       ret = -EINVAL;
-                       goto free_pbc;
-               }
-
-
-               idx++;
-               while (pktnwc < pktnw && idx < niov) {
-                       const size_t slen = iov[idx].iov_len;
-                       const unsigned long faddr =
-                               (unsigned long) iov[idx].iov_base;
-
-                       if (slen & 3 || faddr & 3 || !slen ||
-                           slen > PAGE_SIZE) {
-                               ret = -EINVAL;
-                               goto free_pbc;
-                       }
-
-                       npages++;
-                       if ((faddr & PAGE_MASK) !=
-                           ((faddr + slen - 1) & PAGE_MASK))
-                               npages++;
-
-                       pktnwc += slen >> 2;
-                       idx++;
-                       nfrags++;
-               }
-
-               if (pktnwc != pktnw) {
-                       ret = -EINVAL;
-                       goto free_pbc;
-               }
-
-               if (page) {
-                       dma_addr = dma_map_page(&dd->pcidev->dev,
-                                               page, 0, len, DMA_TO_DEVICE);
-                       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-                               ret = -ENOMEM;
-                               goto free_pbc;
-                       }
-
-                       dma_mapped = 1;
-               }
-
-               ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
-                                           page, pbc, dma_addr);
-
-               if (nfrags) {
-                       ret = ipath_user_sdma_init_payload(dd, pq, pkt,
-                                                          iov + idx_save + 1,
-                                                          nfrags, npages);
-                       if (ret < 0)
-                               goto free_pbc_dma;
-               }
-
-               counter++;
-               npkts++;
-
-               list_add_tail(&pkt->list, list);
-       }
-
-       ret = idx;
-       goto done;
-
-free_pbc_dma:
-       if (dma_mapped)
-               dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
-free_pbc:
-       if (page) {
-               kunmap(page);
-               __free_page(page);
-       } else
-               dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
-       kmem_cache_free(pq->pkt_slab, pkt);
-free_list:
-       ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
-done:
-       return ret;
-}
-
-static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
-                                                u32 c)
-{
-       pq->sent_counter = c;
-}
-
-/* try to clean out queue -- needs pq->lock */
-static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
-                                      struct ipath_user_sdma_queue *pq)
-{
-       struct list_head free_list;
-       struct ipath_user_sdma_pkt *pkt;
-       struct ipath_user_sdma_pkt *pkt_prev;
-       int ret = 0;
-
-       INIT_LIST_HEAD(&free_list);
-
-       list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
-               s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
-
-               if (descd < 0)
-                       break;
-
-               list_move_tail(&pkt->list, &free_list);
-
-               /* one more packet cleaned */
-               ret++;
-       }
-
-       if (!list_empty(&free_list)) {
-               u32 counter;
-
-               pkt = list_entry(free_list.prev,
-                                struct ipath_user_sdma_pkt, list);
-               counter = pkt->counter;
-
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-               ipath_user_sdma_set_complete_counter(pq, counter);
-       }
-
-       return ret;
-}
-
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
-{
-       if (!pq)
-               return;
-
-       kmem_cache_destroy(pq->pkt_slab);
-       dma_pool_destroy(pq->header_cache);
-       kfree(pq);
-}
-
-/* clean descriptor queue, returns > 0 if some elements cleaned */
-static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
-{
-       int ret;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-       ret = ipath_sdma_make_progress(dd);
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       return ret;
-}
-
-/* we're in close, drain packets so that we can cleanup successfully... */
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-                                struct ipath_user_sdma_queue *pq)
-{
-       int i;
-
-       if (!pq)
-               return;
-
-       for (i = 0; i < 100; i++) {
-               mutex_lock(&pq->lock);
-               if (list_empty(&pq->sent)) {
-                       mutex_unlock(&pq->lock);
-                       break;
-               }
-               ipath_user_sdma_hwqueue_clean(dd);
-               ipath_user_sdma_queue_clean(dd, pq);
-               mutex_unlock(&pq->lock);
-               msleep(10);
-       }
-
-       if (!list_empty(&pq->sent)) {
-               struct list_head free_list;
-
-               printk(KERN_INFO "drain: lists not empty: forcing!\n");
-               INIT_LIST_HEAD(&free_list);
-               mutex_lock(&pq->lock);
-               list_splice_init(&pq->sent, &free_list);
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-               mutex_unlock(&pq->lock);
-       }
-}
-
-static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
-                                          u64 addr, u64 dwlen, u64 dwoffset)
-{
-       return cpu_to_le64(/* SDmaPhyAddr[31:0] */
-                          ((addr & 0xfffffffcULL) << 32) |
-                          /* SDmaGeneration[1:0] */
-                          ((dd->ipath_sdma_generation & 3ULL) << 30) |
-                          /* SDmaDwordCount[10:0] */
-                          ((dwlen & 0x7ffULL) << 16) |
-                          /* SDmaBufOffset[12:2] */
-                          (dwoffset & 0x7ffULL));
-}
-
-static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
-{
-       return descq | cpu_to_le64(1ULL << 12);
-}
-
-static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
-{
-                                             /* last */  /* dma head */
-       return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
-}
-
-static inline __le64 ipath_sdma_make_desc1(u64 addr)
-{
-       /* SDmaPhyAddr[47:32] */
-       return cpu_to_le64(addr >> 32);
-}
-
-static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
-                                     struct ipath_user_sdma_pkt *pkt, int idx,
-                                     unsigned ofs, u16 tail)
-{
-       const u64 addr = (u64) pkt->addr[idx].addr +
-               (u64) pkt->addr[idx].offset;
-       const u64 dwlen = (u64) pkt->addr[idx].length / 4;
-       __le64 *descqp;
-       __le64 descq0;
-
-       descqp = &dd->ipath_sdma_descq[tail].qw[0];
-
-       descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
-       if (idx == 0)
-               descq0 = ipath_sdma_make_first_desc0(descq0);
-       if (idx == pkt->naddr - 1)
-               descq0 = ipath_sdma_make_last_desc0(descq0);
-
-       descqp[0] = descq0;
-       descqp[1] = ipath_sdma_make_desc1(addr);
-}
-
-/* pq->lock must be held, get packets on the wire... */
-static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
-                                    struct ipath_user_sdma_queue *pq,
-                                    struct list_head *pktlist)
-{
-       int ret = 0;
-       unsigned long flags;
-       u16 tail;
-
-       if (list_empty(pktlist))
-               return 0;
-
-       if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
-               return -ECOMM;
-
-       spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-       if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
-               ret = -ECOMM;
-               goto unlock;
-       }
-
-       tail = dd->ipath_sdma_descq_tail;
-       while (!list_empty(pktlist)) {
-               struct ipath_user_sdma_pkt *pkt =
-                       list_entry(pktlist->next, struct ipath_user_sdma_pkt,
-                                  list);
-               int i;
-               unsigned ofs = 0;
-               u16 dtail = tail;
-
-               if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
-                       goto unlock_check_tail;
-
-               for (i = 0; i < pkt->naddr; i++) {
-                       ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
-                       ofs += pkt->addr[i].length >> 2;
-
-                       if (++tail == dd->ipath_sdma_descq_cnt) {
-                               tail = 0;
-                               ++dd->ipath_sdma_generation;
-                       }
-               }
-
-               if ((ofs<<2) > dd->ipath_ibmaxlen) {
-                       ipath_dbg("packet size %X > ibmax %X, fail\n",
-                               ofs<<2, dd->ipath_ibmaxlen);
-                       ret = -EMSGSIZE;
-                       goto unlock;
-               }
-
-               /*
-                * if the packet is >= 2KB mtu equivalent, we have to use
-                * the large buffers, and have to mark each descriptor as
-                * part of a large buffer packet.
-                */
-               if (ofs >= IPATH_SMALLBUF_DWORDS) {
-                       for (i = 0; i < pkt->naddr; i++) {
-                               dd->ipath_sdma_descq[dtail].qw[0] |=
-                                       cpu_to_le64(1ULL << 14);
-                               if (++dtail == dd->ipath_sdma_descq_cnt)
-                                       dtail = 0;
-                       }
-               }
-
-               dd->ipath_sdma_descq_added += pkt->naddr;
-               pkt->added = dd->ipath_sdma_descq_added;
-               list_move_tail(&pkt->list, &pq->sent);
-               ret++;
-       }
-
-unlock_check_tail:
-       /* advance the tail on the chip if necessary */
-       if (dd->ipath_sdma_descq_tail != tail) {
-               wmb();
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-               dd->ipath_sdma_descq_tail = tail;
-       }
-
-unlock:
-       spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-       return ret;
-}
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-                          struct ipath_user_sdma_queue *pq,
-                          const struct iovec *iov,
-                          unsigned long dim)
-{
-       int ret = 0;
-       struct list_head list;
-       int npkts = 0;
-
-       INIT_LIST_HEAD(&list);
-
-       mutex_lock(&pq->lock);
-
-       if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
-               ipath_user_sdma_hwqueue_clean(dd);
-               ipath_user_sdma_queue_clean(dd, pq);
-       }
-
-       while (dim) {
-               const int mxp = 8;
-
-               ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
-               if (ret <= 0)
-                       goto done_unlock;
-               else {
-                       dim -= ret;
-                       iov += ret;
-               }
-
-               /* force packets onto the sdma hw queue... */
-               if (!list_empty(&list)) {
-                       /*
-                        * lazily clean hw queue.  the 4 is a guess of about
-                        * how many sdma descriptors a packet will take (it
-                        * doesn't have to be perfect).
-                        */
-                       if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
-                               ipath_user_sdma_hwqueue_clean(dd);
-                               ipath_user_sdma_queue_clean(dd, pq);
-                       }
-
-                       ret = ipath_user_sdma_push_pkts(dd, pq, &list);
-                       if (ret < 0)
-                               goto done_unlock;
-                       else {
-                               npkts += ret;
-                               pq->counter += ret;
-
-                               if (!list_empty(&list))
-                                       goto done_unlock;
-                       }
-               }
-       }
-
-done_unlock:
-       if (!list_empty(&list))
-               ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
-       mutex_unlock(&pq->lock);
-
-       return (ret < 0) ? ret : npkts;
-}
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-                                 struct ipath_user_sdma_queue *pq)
-{
-       int ret = 0;
-
-       mutex_lock(&pq->lock);
-       ipath_user_sdma_hwqueue_clean(dd);
-       ret = ipath_user_sdma_queue_clean(dd, pq);
-       mutex_unlock(&pq->lock);
-
-       return ret;
-}
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
-{
-       return pq->sent_counter;
-}
-
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
-{
-       return pq->counter;
-}
-
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.h b/drivers/staging/rdma/ipath/ipath_user_sdma.h
deleted file mode 100644 (file)
index fc76316..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/device.h>
-
-struct ipath_user_sdma_queue;
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-                          struct ipath_user_sdma_queue *pq,
-                          const struct iovec *iov,
-                          unsigned long dim);
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-                                 struct ipath_user_sdma_queue *pq);
-
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-                                struct ipath_user_sdma_queue *pq);
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
deleted file mode 100644 (file)
index 1778dee..0000000
+++ /dev/null
@@ -1,2377 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_mad.h>
-#include <rdma/ib_user_verbs.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/utsname.h>
-#include <linux/rculist.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-static unsigned int ib_ipath_qp_table_size = 251;
-module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
-MODULE_PARM_DESC(qp_table_size, "QP table size");
-
-unsigned int ib_ipath_lkey_table_size = 12;
-module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
-                  S_IRUGO);
-MODULE_PARM_DESC(lkey_table_size,
-                "LKEY table size in bits (2^n, 1 <= n <= 23)");
-
-static unsigned int ib_ipath_max_pds = 0xFFFF;
-module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_pds,
-                "Maximum number of protection domains to support");
-
-static unsigned int ib_ipath_max_ahs = 0xFFFF;
-module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-
-unsigned int ib_ipath_max_cqes = 0x2FFFF;
-module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqes,
-                "Maximum number of completion queue entries to support");
-
-unsigned int ib_ipath_max_cqs = 0x1FFFF;
-module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
-
-unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
-module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
-                  S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-
-unsigned int ib_ipath_max_qps = 16384;
-module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
-
-unsigned int ib_ipath_max_sges = 0x60;
-module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
-
-unsigned int ib_ipath_max_mcast_grps = 16384;
-module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
-                  S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_grps,
-                "Maximum number of multicast groups to support");
-
-unsigned int ib_ipath_max_mcast_qp_attached = 16;
-module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_qp_attached,
-                "Maximum number of attached QPs to support");
-
-unsigned int ib_ipath_max_srqs = 1024;
-module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
-
-unsigned int ib_ipath_max_srq_sges = 128;
-module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
-
-unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
-module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
-                  uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
-
-static unsigned int ib_ipath_disable_sma;
-module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(disable_sma, "Disable the SMA");
-
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; ipath_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
-       [IB_QPS_RESET] = 0,
-       [IB_QPS_INIT] = IPATH_POST_RECV_OK,
-       [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
-       [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
-           IPATH_PROCESS_NEXT_SEND_OK,
-       [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
-       [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-           IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-       [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
-           IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-};
-
-struct ipath_ucontext {
-       struct ib_ucontext ibucontext;
-};
-
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-                                                 *ibucontext)
-{
-       return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
-       [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-       [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-       [IB_WR_SEND] = IB_WC_SEND,
-       [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-       [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-       [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-       [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
- * System image GUID.
- */
-static __be64 sys_image_guid;
-
-/**
- * ipath_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               memcpy(sge->vaddr, data, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               data += len;
-               length -= len;
-       }
-}
-
-/**
- * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               length -= len;
-       }
-}
-
-/*
- * Count the number of DMA descriptors needed to send length bytes of data.
- * Don't modify the ipath_sge_state to get the count.
- * Return zero if any of the segments is not aligned.
- */
-static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sg_list = ss->sg_list;
-       struct ipath_sge sge = ss->sge;
-       u8 num_sge = ss->num_sge;
-       u32 ndesc = 1;  /* count the header */
-
-       while (length) {
-               u32 len = sge.length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge.sge_length)
-                       len = sge.sge_length;
-               BUG_ON(len == 0);
-               if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
-                   (len != length && (len & (sizeof(u32) - 1)))) {
-                       ndesc = 0;
-                       break;
-               }
-               ndesc++;
-               sge.vaddr += len;
-               sge.length -= len;
-               sge.sge_length -= len;
-               if (sge.sge_length == 0) {
-                       if (--num_sge)
-                               sge = *sg_list++;
-               } else if (sge.length == 0 && sge.mr != NULL) {
-                       if (++sge.n >= IPATH_SEGSZ) {
-                               if (++sge.m >= sge.mr->mapsz)
-                                       break;
-                               sge.n = 0;
-                       }
-                       sge.vaddr =
-                               sge.mr->map[sge.m]->segs[sge.n].vaddr;
-                       sge.length =
-                               sge.mr->map[sge.m]->segs[sge.n].length;
-               }
-               length -= len;
-       }
-       return ndesc;
-}
-
-/*
- * Copy from the SGEs to the data buffer.
- */
-static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
-                               u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       while (length) {
-               u32 len = sge->length;
-
-               if (len > length)
-                       len = length;
-               if (len > sge->sge_length)
-                       len = sge->sge_length;
-               BUG_ON(len == 0);
-               memcpy(data, sge->vaddr, len);
-               sge->vaddr += len;
-               sge->length -= len;
-               sge->sge_length -= len;
-               if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
-               } else if (sge->length == 0 && sge->mr != NULL) {
-                       if (++sge->n >= IPATH_SEGSZ) {
-                               if (++sge->m >= sge->mr->mapsz)
-                                       break;
-                               sge->n = 0;
-                       }
-                       sge->vaddr =
-                               sge->mr->map[sge->m]->segs[sge->n].vaddr;
-                       sge->length =
-                               sge->mr->map[sge->m]->segs[sge->n].length;
-               }
-               data += len;
-               length -= len;
-       }
-}
-
-/**
- * ipath_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-       struct ipath_swqe *wqe;
-       u32 next;
-       int i;
-       int j;
-       int acc;
-       int ret;
-       unsigned long flags;
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (qp->ibqp.qp_type != IB_QPT_SMI &&
-           !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-               ret = -ENETDOWN;
-               goto bail;
-       }
-
-       /* Check that state is OK to post send. */
-       if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
-               goto bail_inval;
-
-       /* IB spec says that num_sge == 0 is OK. */
-       if (wr->num_sge > qp->s_max_sge)
-               goto bail_inval;
-
-       /*
-        * Don't allow RDMA reads or atomic operations on UC or
-        * undefined operations.
-        * Make sure buffer is large enough to hold the result for atomics.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_UC) {
-               if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-                       goto bail_inval;
-       } else if (qp->ibqp.qp_type == IB_QPT_UD) {
-               /* Check UD opcode */
-               if (wr->opcode != IB_WR_SEND &&
-                   wr->opcode != IB_WR_SEND_WITH_IMM)
-                       goto bail_inval;
-               /* Check UD destination address PD */
-               if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-                       goto bail_inval;
-       } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-               goto bail_inval;
-       else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-                  (wr->num_sge == 0 ||
-                   wr->sg_list[0].length < sizeof(u64) ||
-                   wr->sg_list[0].addr & (sizeof(u64) - 1)))
-               goto bail_inval;
-       else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-               goto bail_inval;
-
-       next = qp->s_head + 1;
-       if (next >= qp->s_size)
-               next = 0;
-       if (next == qp->s_last) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       wqe = get_swqe_ptr(qp, qp->s_head);
-
-       if (qp->ibqp.qp_type != IB_QPT_UC &&
-           qp->ibqp.qp_type != IB_QPT_RC)
-               memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-       else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-                wr->opcode == IB_WR_RDMA_WRITE ||
-                wr->opcode == IB_WR_RDMA_READ)
-               memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-       else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-                wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-               memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-       else
-               memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-       wqe->length = 0;
-       if (wr->num_sge) {
-               acc = wr->opcode >= IB_WR_RDMA_READ ?
-                       IB_ACCESS_LOCAL_WRITE : 0;
-               for (i = 0, j = 0; i < wr->num_sge; i++) {
-                       u32 length = wr->sg_list[i].length;
-                       int ok;
-
-                       if (length == 0)
-                               continue;
-                       ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
-                                          &wr->sg_list[i], acc);
-                       if (!ok)
-                               goto bail_inval;
-                       wqe->length += length;
-                       j++;
-               }
-               wqe->wr.num_sge = j;
-       }
-       if (qp->ibqp.qp_type == IB_QPT_UC ||
-           qp->ibqp.qp_type == IB_QPT_RC) {
-               if (wqe->length > 0x80000000U)
-                       goto bail_inval;
-       } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
-               goto bail_inval;
-       wqe->ssn = qp->s_ssn++;
-       qp->s_head = next;
-
-       ret = 0;
-       goto bail;
-
-bail_inval:
-       ret = -EINVAL;
-bail:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       return ret;
-}
-
-/**
- * ipath_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-                          struct ib_send_wr **bad_wr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       int err = 0;
-
-       for (; wr; wr = wr->next) {
-               err = ipath_post_one_send(qp, wr);
-               if (err) {
-                       *bad_wr = wr;
-                       goto bail;
-               }
-       }
-
-       /* Try to do the send work in the caller's context. */
-       ipath_do_send((unsigned long) qp);
-
-bail:
-       return err;
-}
-
-/**
- * ipath_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-                             struct ib_recv_wr **bad_wr)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_rwq *wq = qp->r_rq.wq;
-       unsigned long flags;
-       int ret;
-
-       /* Check that state is OK to post receive. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
-               *bad_wr = wr;
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       for (; wr; wr = wr->next) {
-               struct ipath_rwqe *wqe;
-               u32 next;
-               int i;
-
-               if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-                       *bad_wr = wr;
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               spin_lock_irqsave(&qp->r_rq.lock, flags);
-               next = wq->head + 1;
-               if (next >= qp->r_rq.size)
-                       next = 0;
-               if (next == wq->tail) {
-                       spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-                       *bad_wr = wr;
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-
-               wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-               wqe->wr_id = wr->wr_id;
-               wqe->num_sge = wr->num_sge;
-               for (i = 0; i < wr->num_sge; i++)
-                       wqe->sg_list[i] = wr->sg_list[i];
-               /* Make sure queue entry is written before the head index. */
-               smp_wmb();
-               wq->head = next;
-               spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-       }
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_qp_rcv - processing an incoming packet on a QP
- * @dev: the device the packet came on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_ib_rcv() to process an incoming packet
- * for the given QP.
- * Called at interrupt level.
- */
-static void ipath_qp_rcv(struct ipath_ibdev *dev,
-                        struct ipath_ib_header *hdr, int has_grh,
-                        void *data, u32 tlen, struct ipath_qp *qp)
-{
-       /* Check for valid receive state. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-               dev->n_pkt_drops++;
-               return;
-       }
-
-       switch (qp->ibqp.qp_type) {
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
-               if (ib_ipath_disable_sma)
-                       break;
-               /* FALLTHROUGH */
-       case IB_QPT_UD:
-               ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       case IB_QPT_RC:
-               ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       case IB_QPT_UC:
-               ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
-               break;
-
-       default:
-               break;
-       }
-}
-
-/**
- * ipath_ib_rcv - process an incoming packet
- * @arg: the device pointer
- * @rhdr: the header of the packet
- * @data: the packet data
- * @tlen: the packet length
- *
- * This is called from ipath_kreceive() to process an incoming packet at
- * interrupt level. Tlen is the length of the header + data + CRC in bytes.
- */
-void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
-                 u32 tlen)
-{
-       struct ipath_ib_header *hdr = rhdr;
-       struct ipath_other_headers *ohdr;
-       struct ipath_qp *qp;
-       u32 qp_num;
-       int lnh;
-       u8 opcode;
-       u16 lid;
-
-       if (unlikely(dev == NULL))
-               goto bail;
-
-       if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
-               dev->rcv_errors++;
-               goto bail;
-       }
-
-       /* Check for a valid destination LID (see ch. 7.11.1). */
-       lid = be16_to_cpu(hdr->lrh[1]);
-       if (lid < IPATH_MULTICAST_LID_BASE) {
-               lid &= ~((1 << dev->dd->ipath_lmc) - 1);
-               if (unlikely(lid != dev->dd->ipath_lid)) {
-                       dev->rcv_errors++;
-                       goto bail;
-               }
-       }
-
-       /* Check for GRH */
-       lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-       if (lnh == IPATH_LRH_BTH)
-               ohdr = &hdr->u.oth;
-       else if (lnh == IPATH_LRH_GRH)
-               ohdr = &hdr->u.l.oth;
-       else {
-               dev->rcv_errors++;
-               goto bail;
-       }
-
-       opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
-       dev->opstats[opcode].n_bytes += tlen;
-       dev->opstats[opcode].n_packets++;
-
-       /* Get the destination QP number. */
-       qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
-       if (qp_num == IPATH_MULTICAST_QPN) {
-               struct ipath_mcast *mcast;
-               struct ipath_mcast_qp *p;
-
-               if (lnh != IPATH_LRH_GRH) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
-               if (mcast == NULL) {
-                       dev->n_pkt_drops++;
-                       goto bail;
-               }
-               dev->n_multicast_rcv++;
-               list_for_each_entry_rcu(p, &mcast->qp_list, list)
-                       ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
-               /*
-                * Notify ipath_multicast_detach() if it is waiting for us
-                * to finish.
-                */
-               if (atomic_dec_return(&mcast->refcount) <= 1)
-                       wake_up(&mcast->wait);
-       } else {
-               qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
-               if (qp) {
-                       dev->n_unicast_rcv++;
-                       ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-                                    tlen, qp);
-                       /*
-                        * Notify ipath_destroy_qp() if it is waiting
-                        * for us to finish.
-                        */
-                       if (atomic_dec_and_test(&qp->refcount))
-                               wake_up(&qp->wait);
-               } else
-                       dev->n_pkt_drops++;
-       }
-
-bail:;
-}
-
-/**
- * ipath_ib_timer - verbs timer
- * @arg: the device pointer
- *
- * This is called from ipath_do_rcv_timer() at interrupt level to check for
- * QPs which need retransmits and to collect performance numbers.
- */
-static void ipath_ib_timer(struct ipath_ibdev *dev)
-{
-       struct ipath_qp *resend = NULL;
-       struct ipath_qp *rnr = NULL;
-       struct list_head *last;
-       struct ipath_qp *qp;
-       unsigned long flags;
-
-       if (dev == NULL)
-               return;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       /* Start filling the next pending queue. */
-       if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
-               dev->pending_index = 0;
-       /* Save any requests still in the new queue, they have timed out. */
-       last = &dev->pending[dev->pending_index];
-       while (!list_empty(last)) {
-               qp = list_entry(last->next, struct ipath_qp, timerwait);
-               list_del_init(&qp->timerwait);
-               qp->timer_next = resend;
-               resend = qp;
-               atomic_inc(&qp->refcount);
-       }
-       last = &dev->rnrwait;
-       if (!list_empty(last)) {
-               qp = list_entry(last->next, struct ipath_qp, timerwait);
-               if (--qp->s_rnr_timeout == 0) {
-                       do {
-                               list_del_init(&qp->timerwait);
-                               qp->timer_next = rnr;
-                               rnr = qp;
-                               atomic_inc(&qp->refcount);
-                               if (list_empty(last))
-                                       break;
-                               qp = list_entry(last->next, struct ipath_qp,
-                                               timerwait);
-                       } while (qp->s_rnr_timeout == 0);
-               }
-       }
-       /*
-        * We should only be in the started state if pma_sample_start != 0
-        */
-       if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
-           --dev->pma_sample_start == 0) {
-               dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-               ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
-                                       &dev->ipath_rword,
-                                       &dev->ipath_spkts,
-                                       &dev->ipath_rpkts,
-                                       &dev->ipath_xmit_wait);
-       }
-       if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
-               if (dev->pma_sample_interval == 0) {
-                       u64 ta, tb, tc, td, te;
-
-                       dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-                       ipath_snapshot_counters(dev->dd, &ta, &tb,
-                                               &tc, &td, &te);
-
-                       dev->ipath_sword = ta - dev->ipath_sword;
-                       dev->ipath_rword = tb - dev->ipath_rword;
-                       dev->ipath_spkts = tc - dev->ipath_spkts;
-                       dev->ipath_rpkts = td - dev->ipath_rpkts;
-                       dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
-               } else {
-                       dev->pma_sample_interval--;
-               }
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       /* XXX What if timer fires again while this is running? */
-       while (resend != NULL) {
-               qp = resend;
-               resend = qp->timer_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (qp->s_last != qp->s_tail &&
-                   ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-                       dev->n_timeouts++;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1);
-               }
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-       while (rnr != NULL) {
-               qp = rnr;
-               rnr = qp->timer_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-}
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-       struct ipath_sge *sge = &ss->sge;
-
-       sge->vaddr += length;
-       sge->length -= length;
-       sge->sge_length -= length;
-       if (sge->sge_length == 0) {
-               if (--ss->num_sge)
-                       *sge = *ss->sg_list++;
-       } else if (sge->length == 0 && sge->mr != NULL) {
-               if (++sge->n >= IPATH_SEGSZ) {
-                       if (++sge->m >= sge->mr->mapsz)
-                               return;
-                       sge->n = 0;
-               }
-               sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-               sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-       }
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-       return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-       return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-       data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-       data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-       return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-                   u32 length, unsigned flush_wc)
-{
-       u32 extra = 0;
-       u32 data = 0;
-       u32 last;
-
-       while (1) {
-               u32 len = ss->sge.length;
-               u32 off;
-
-               if (len > length)
-                       len = length;
-               if (len > ss->sge.sge_length)
-                       len = ss->sge.sge_length;
-               BUG_ON(len == 0);
-               /* If the source address is not aligned, try to align it. */
-               off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-               if (off) {
-                       u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-                                           ~(sizeof(u32) - 1));
-                       u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-                       u32 y;
-
-                       y = sizeof(u32) - off;
-                       if (len > y)
-                               len = y;
-                       if (len + extra >= sizeof(u32)) {
-                               data |= set_upper_bits(v, extra *
-                                                      BITS_PER_BYTE);
-                               len = sizeof(u32) - extra;
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               __raw_writel(data, piobuf);
-                               piobuf++;
-                               extra = 0;
-                               data = 0;
-                       } else {
-                               /* Clear unused upper bytes */
-                               data |= clear_upper_bytes(v, len, extra);
-                               if (len == length) {
-                                       last = data;
-                                       break;
-                               }
-                               extra += len;
-                       }
-               } else if (extra) {
-                       /* Source address is aligned. */
-                       u32 *addr = (u32 *) ss->sge.vaddr;
-                       int shift = extra * BITS_PER_BYTE;
-                       int ushift = 32 - shift;
-                       u32 l = len;
-
-                       while (l >= sizeof(u32)) {
-                               u32 v = *addr;
-
-                               data |= set_upper_bits(v, shift);
-                               __raw_writel(data, piobuf);
-                               data = get_upper_bits(v, ushift);
-                               piobuf++;
-                               addr++;
-                               l -= sizeof(u32);
-                       }
-                       /*
-                        * We still have 'extra' number of bytes leftover.
-                        */
-                       if (l) {
-                               u32 v = *addr;
-
-                               if (l + extra >= sizeof(u32)) {
-                                       data |= set_upper_bits(v, shift);
-                                       len -= l + extra - sizeof(u32);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       __raw_writel(data, piobuf);
-                                       piobuf++;
-                                       extra = 0;
-                                       data = 0;
-                               } else {
-                                       /* Clear unused upper bytes */
-                                       data |= clear_upper_bytes(v, l,
-                                                                 extra);
-                                       if (len == length) {
-                                               last = data;
-                                               break;
-                                       }
-                                       extra += l;
-                               }
-                       } else if (len == length) {
-                               last = data;
-                               break;
-                       }
-               } else if (len == length) {
-                       u32 w;
-
-                       /*
-                        * Need to round up for the last dword in the
-                        * packet.
-                        */
-                       w = (len + 3) >> 2;
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-                       piobuf += w - 1;
-                       last = ((u32 *) ss->sge.vaddr)[w - 1];
-                       break;
-               } else {
-                       u32 w = len >> 2;
-
-                       __iowrite32_copy(piobuf, ss->sge.vaddr, w);
-                       piobuf += w;
-
-                       extra = len & (sizeof(u32) - 1);
-                       if (extra) {
-                               u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-                               /* Clear unused upper bytes */
-                               data = clear_upper_bytes(v, extra, 0);
-                       }
-               }
-               update_sge(ss, len);
-               length -= len;
-       }
-       /* Update address before sending packet. */
-       update_sge(ss, length);
-       if (flush_wc) {
-               /* must flush early everything before trigger word */
-               ipath_flush_wc();
-               __raw_writel(last, piobuf);
-               /* be sure trigger word is written */
-               ipath_flush_wc();
-       } else
-               __raw_writel(last, piobuf);
-}
-
-/*
- * Convert IB rate to delay multiplier.
- */
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
-{
-       switch (rate) {
-       case IB_RATE_2_5_GBPS: return 8;
-       case IB_RATE_5_GBPS:   return 4;
-       case IB_RATE_10_GBPS:  return 2;
-       case IB_RATE_20_GBPS:  return 1;
-       default:               return 0;
-       }
-}
-
-/*
- * Convert delay multiplier to IB rate
- */
-static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
-{
-       switch (mult) {
-       case 8:  return IB_RATE_2_5_GBPS;
-       case 4:  return IB_RATE_5_GBPS;
-       case 2:  return IB_RATE_10_GBPS;
-       case 1:  return IB_RATE_20_GBPS;
-       default: return IB_RATE_PORT_CURRENT;
-       }
-}
-
-static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
-{
-       struct ipath_verbs_txreq *tx = NULL;
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (!list_empty(&dev->txreq_free)) {
-               struct list_head *l = dev->txreq_free.next;
-
-               list_del(l);
-               tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-       return tx;
-}
-
-static inline void put_txreq(struct ipath_ibdev *dev,
-                            struct ipath_verbs_txreq *tx)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       list_add(&tx->txreq.list, &dev->txreq_free);
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-}
-
-static void sdma_complete(void *cookie, int status)
-{
-       struct ipath_verbs_txreq *tx = cookie;
-       struct ipath_qp *qp = tx->qp;
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       unsigned long flags;
-       enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
-               IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
-       if (atomic_dec_and_test(&qp->s_dma_busy)) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (tx->wqe)
-                       ipath_send_complete(qp, tx->wqe, ibs);
-               if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-                    qp->s_last != qp->s_head) ||
-                   (qp->s_flags & IPATH_S_WAIT_DMA))
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               wake_up(&qp->wait_dma);
-       } else if (tx->wqe) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               ipath_send_complete(qp, tx->wqe, ibs);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-       }
-
-       if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-               kfree(tx->txreq.map_addr);
-       put_txreq(dev, tx);
-
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-}
-
-static void decrement_dma_busy(struct ipath_qp *qp)
-{
-       unsigned long flags;
-
-       if (atomic_dec_and_test(&qp->s_dma_busy)) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-                    qp->s_last != qp->s_head) ||
-                   (qp->s_flags & IPATH_S_WAIT_DMA))
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               wake_up(&qp->wait_dma);
-       }
-}
-
-/*
- * Compute the number of clock cycles of delay before sending the next packet.
- * The multipliers reflect the number of clocks for the fastest rate so
- * one tick at 4xDDR is 8 ticks at 1xSDR.
- * If the destination port will take longer to receive a packet than
- * the outgoing link can send it, we need to delay sending the next packet
- * by the difference in time it takes the receiver to receive and the sender
- * to send this packet.
- * Note that this delay is always correct for UC and RC but not always
- * optimal for UD. For UD, the destination HCA can be different for each
- * packet, in which case, we could send packets to a different destination
- * while "waiting" for the delay. The overhead for doing this without
- * HW support is more than just paying the cost of delaying some packets
- * unnecessarily.
- */
-static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
-{
-       return (rcv_mult > snd_mult) ?
-               (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
-}
-
-static int ipath_verbs_send_dma(struct ipath_qp *qp,
-                               struct ipath_ib_header *hdr, u32 hdrwords,
-                               struct ipath_sge_state *ss, u32 len,
-                               u32 plen, u32 dwords)
-{
-       struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       struct ipath_devdata *dd = dev->dd;
-       struct ipath_verbs_txreq *tx;
-       u32 *piobuf;
-       u32 control;
-       u32 ndesc;
-       int ret;
-
-       tx = qp->s_tx;
-       if (tx) {
-               qp->s_tx = NULL;
-               /* resend previously constructed packet */
-               atomic_inc(&qp->s_dma_busy);
-               ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
-               if (ret) {
-                       qp->s_tx = tx;
-                       decrement_dma_busy(qp);
-               }
-               goto bail;
-       }
-
-       tx = get_txreq(dev);
-       if (!tx) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Get the saved delay count we computed for the previous packet
-        * and save the delay count for this packet to be used next time
-        * we get here.
-        */
-       control = qp->s_pkt_delay;
-       qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-       tx->qp = qp;
-       atomic_inc(&qp->refcount);
-       tx->wqe = qp->s_wqe;
-       tx->txreq.callback = sdma_complete;
-       tx->txreq.callback_cookie = tx;
-       tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
-               IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
-       if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-               tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
-
-       /* VL15 packets bypass credit check */
-       if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
-               control |= 1ULL << 31;
-               tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
-       }
-
-       if (len) {
-               /*
-                * Don't try to DMA if it takes more descriptors than
-                * the queue holds.
-                */
-               ndesc = ipath_count_sge(ss, len);
-               if (ndesc >= dd->ipath_sdma_descq_cnt)
-                       ndesc = 0;
-       } else
-               ndesc = 1;
-       if (ndesc) {
-               tx->hdr.pbc[0] = cpu_to_le32(plen);
-               tx->hdr.pbc[1] = cpu_to_le32(control);
-               memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
-               tx->txreq.sg_count = ndesc;
-               tx->map_len = (hdrwords + 2) << 2;
-               tx->txreq.map_addr = &tx->hdr;
-               atomic_inc(&qp->s_dma_busy);
-               ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
-               if (ret) {
-                       /* save ss and length in dwords */
-                       tx->ss = ss;
-                       tx->len = dwords;
-                       qp->s_tx = tx;
-                       decrement_dma_busy(qp);
-               }
-               goto bail;
-       }
-
-       /* Allocate a buffer and copy the header and payload to it. */
-       tx->map_len = (plen + 1) << 2;
-       piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto err_tx;
-       }
-       tx->txreq.map_addr = piobuf;
-       tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
-       tx->txreq.sg_count = 1;
-
-       *piobuf++ = (__force u32) cpu_to_le32(plen);
-       *piobuf++ = (__force u32) cpu_to_le32(control);
-       memcpy(piobuf, hdr, hdrwords << 2);
-       ipath_copy_from_sge(piobuf + hdrwords, ss, len);
-
-       atomic_inc(&qp->s_dma_busy);
-       ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
-       /*
-        * If we couldn't queue the DMA request, save the info
-        * and try again later rather than destroying the
-        * buffer and undoing the side effects of the copy.
-        */
-       if (ret) {
-               tx->ss = NULL;
-               tx->len = 0;
-               qp->s_tx = tx;
-               decrement_dma_busy(qp);
-       }
-       dev->n_unaligned++;
-       goto bail;
-
-err_tx:
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-       put_txreq(dev, tx);
-bail:
-       return ret;
-}
-
-static int ipath_verbs_send_pio(struct ipath_qp *qp,
-                               struct ipath_ib_header *ibhdr, u32 hdrwords,
-                               struct ipath_sge_state *ss, u32 len,
-                               u32 plen, u32 dwords)
-{
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-       u32 *hdr = (u32 *) ibhdr;
-       u32 __iomem *piobuf;
-       unsigned flush_wc;
-       u32 control;
-       int ret;
-       unsigned long flags;
-
-       piobuf = ipath_getpiobuf(dd, plen, NULL);
-       if (unlikely(piobuf == NULL)) {
-               ret = -EBUSY;
-               goto bail;
-       }
-
-       /*
-        * Get the saved delay count we computed for the previous packet
-        * and save the delay count for this packet to be used next time
-        * we get here.
-        */
-       control = qp->s_pkt_delay;
-       qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-       /* VL15 packets bypass credit check */
-       if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
-               control |= 1ULL << 31;
-
-       /*
-        * Write the length to the control qword plus any needed flags.
-        * We have to flush after the PBC for correctness on some cpus
-        * or WC buffer can be written out of order.
-        */
-       writeq(((u64) control << 32) | plen, piobuf);
-       piobuf += 2;
-
-       flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
-       if (len == 0) {
-               /*
-                * If there is just the header portion, must flush before
-                * writing last word of header for correctness, and after
-                * the last header word (trigger word).
-                */
-               if (flush_wc) {
-                       ipath_flush_wc();
-                       __iowrite32_copy(piobuf, hdr, hdrwords - 1);
-                       ipath_flush_wc();
-                       __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-                       ipath_flush_wc();
-               } else
-                       __iowrite32_copy(piobuf, hdr, hdrwords);
-               goto done;
-       }
-
-       if (flush_wc)
-               ipath_flush_wc();
-       __iowrite32_copy(piobuf, hdr, hdrwords);
-       piobuf += hdrwords;
-
-       /* The common case is aligned and contained in one segment. */
-       if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-                  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-               u32 *addr = (u32 *) ss->sge.vaddr;
-
-               /* Update address before sending packet. */
-               update_sge(ss, len);
-               if (flush_wc) {
-                       __iowrite32_copy(piobuf, addr, dwords - 1);
-                       /* must flush early everything before trigger word */
-                       ipath_flush_wc();
-                       __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
-                       /* be sure trigger word is written */
-                       ipath_flush_wc();
-               } else
-                       __iowrite32_copy(piobuf, addr, dwords);
-               goto done;
-       }
-       copy_io(piobuf, ss, len, flush_wc);
-done:
-       if (qp->s_wqe) {
-               spin_lock_irqsave(&qp->s_lock, flags);
-               ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-       }
-       ret = 0;
-bail:
-       return ret;
-}
-
-/**
- * ipath_verbs_send - send a packet
- * @qp: the QP to send on
- * @hdr: the packet header
- * @hdrwords: the number of 32-bit words in the header
- * @ss: the SGE to send
- * @len: the length of the packet in bytes
- */
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-                    u32 hdrwords, struct ipath_sge_state *ss, u32 len)
-{
-       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-       u32 plen;
-       int ret;
-       u32 dwords = (len + 3) >> 2;
-
-       /*
-        * Calculate the send buffer trigger address.
-        * The +1 counts for the pbc control dword following the pbc length.
-        */
-       plen = hdrwords + dwords + 1;
-
-       /*
-        * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-        * can defer SDMA restart until link goes ACTIVE without
-        * worrying about just how we got there.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_SMI ||
-           !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-               ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
-                                          plen, dwords);
-       else
-               ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
-                                          plen, dwords);
-
-       return ret;
-}
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                           u64 *rwords, u64 *spkts, u64 *rpkts,
-                           u64 *xmit_wait)
-{
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ret = -EINVAL;
-               goto bail;
-       }
-       *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-       *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-       *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_get_counters(struct ipath_devdata *dd,
-                      struct ipath_verbs_counters *cntrs)
-{
-       struct ipath_cregs const *crp = dd->ipath_cregs;
-       int ret;
-
-       if (!(dd->ipath_flags & IPATH_INITTED)) {
-               /* no hardware, freeze, etc. */
-               ret = -EINVAL;
-               goto bail;
-       }
-       cntrs->symbol_error_counter =
-               ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
-       cntrs->link_error_recovery_counter =
-               ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
-       /*
-        * The link downed counter counts when the other side downs the
-        * connection.  We add in the number of times we downed the link
-        * due to local link integrity errors to compensate.
-        */
-       cntrs->link_downed_counter =
-               ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
-       cntrs->port_rcv_errors =
-               ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
-               ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
-               ipath_snap_cntr(dd, crp->cr_portovflcnt) +
-               ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
-               ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
-               ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
-               ipath_snap_cntr(dd, crp->cr_erricrccnt) +
-               ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
-               ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
-               ipath_snap_cntr(dd, crp->cr_badformatcnt) +
-               dd->ipath_rxfc_unsupvl_errs;
-       if (crp->cr_rxotherlocalphyerrcnt)
-               cntrs->port_rcv_errors +=
-                       ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
-       if (crp->cr_rxvlerrcnt)
-               cntrs->port_rcv_errors +=
-                       ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
-       cntrs->port_rcv_remphys_errors =
-               ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
-       cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
-       cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
-       cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
-       cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
-       cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
-       cntrs->local_link_integrity_errors =
-               crp->cr_locallinkintegrityerrcnt ?
-               ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
-               ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-                dd->ipath_lli_errs : dd->ipath_lli_errors);
-       cntrs->excessive_buffer_overrun_errors =
-               crp->cr_excessbufferovflcnt ?
-               ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
-               dd->ipath_overrun_thresh_errs;
-       cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
-               ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_ib_piobufavail - callback when a PIO buffer is available
- * @arg: the device pointer
- *
- * This is called from ipath_intr() at interrupt level when a PIO buffer is
- * available after ipath_verbs_send() returned an error that no buffers were
- * available.  Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just restart the send tasklet and
- * return zero).
- */
-int ipath_ib_piobufavail(struct ipath_ibdev *dev)
-{
-       struct list_head *list;
-       struct ipath_qp *qplist;
-       struct ipath_qp *qp;
-       unsigned long flags;
-
-       if (dev == NULL)
-               goto bail;
-
-       list = &dev->piowait;
-       qplist = NULL;
-
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       while (!list_empty(list)) {
-               qp = list_entry(list->next, struct ipath_qp, piowait);
-               list_del_init(&qp->piowait);
-               qp->pio_next = qplist;
-               qplist = qp;
-               atomic_inc(&qp->refcount);
-       }
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-       while (qplist != NULL) {
-               qp = qplist;
-               qplist = qp->pio_next;
-
-               spin_lock_irqsave(&qp->s_lock, flags);
-               if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-                       ipath_schedule_send(qp);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-
-               /* Notify ipath_destroy_qp() if it is waiting. */
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-       }
-
-bail:
-       return 0;
-}
-
-static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-                             struct ib_udata *uhw)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-
-       if (uhw->inlen || uhw->outlen)
-               return -EINVAL;
-
-       memset(props, 0, sizeof(*props));
-
-       props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-               IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-               IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-               IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-       props->page_size_cap = PAGE_SIZE;
-       props->vendor_id =
-               IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
-       props->vendor_part_id = dev->dd->ipath_deviceid;
-       props->hw_ver = dev->dd->ipath_pcirev;
-
-       props->sys_image_guid = dev->sys_image_guid;
-
-       props->max_mr_size = ~0ull;
-       props->max_qp = ib_ipath_max_qps;
-       props->max_qp_wr = ib_ipath_max_qp_wrs;
-       props->max_sge = ib_ipath_max_sges;
-       props->max_sge_rd = ib_ipath_max_sges;
-       props->max_cq = ib_ipath_max_cqs;
-       props->max_ah = ib_ipath_max_ahs;
-       props->max_cqe = ib_ipath_max_cqes;
-       props->max_mr = dev->lk_table.max;
-       props->max_fmr = dev->lk_table.max;
-       props->max_map_per_fmr = 32767;
-       props->max_pd = ib_ipath_max_pds;
-       props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
-       props->max_qp_init_rd_atom = 255;
-       /* props->max_res_rd_atom */
-       props->max_srq = ib_ipath_max_srqs;
-       props->max_srq_wr = ib_ipath_max_srq_wrs;
-       props->max_srq_sge = ib_ipath_max_srq_sges;
-       /* props->local_ca_ack_delay */
-       props->atomic_cap = IB_ATOMIC_GLOB;
-       props->max_pkeys = ipath_get_npkeys(dev->dd);
-       props->max_mcast_grp = ib_ipath_max_mcast_grps;
-       props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
-       props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-               props->max_mcast_grp;
-
-       return 0;
-}
-
-const u8 ipath_cvt_physportstate[32] = {
-       [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
-       [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
-       [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
-       [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
-       [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
-       [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
-       [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
-               IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
-               IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-       [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
-       [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
-};
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
-{
-       return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-static int ipath_query_port(struct ib_device *ibdev,
-                           u8 port, struct ib_port_attr *props)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_devdata *dd = dev->dd;
-       enum ib_mtu mtu;
-       u16 lid = dd->ipath_lid;
-       u64 ibcstat;
-
-       memset(props, 0, sizeof(*props));
-       props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
-       props->lmc = dd->ipath_lmc;
-       props->sm_lid = dev->sm_lid;
-       props->sm_sl = dev->sm_sl;
-       ibcstat = dd->ipath_lastibcstat;
-       /* map LinkState to IB portinfo values.  */
-       props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
-
-       /* See phys_state_show() */
-       props->phys_state = /* MEA: assumes shift == 0 */
-               ipath_cvt_physportstate[dd->ipath_lastibcstat &
-               dd->ibcs_lts_mask];
-       props->port_cap_flags = dev->port_cap_flags;
-       props->gid_tbl_len = 1;
-       props->max_msg_sz = 0x80000000;
-       props->pkey_tbl_len = ipath_get_npkeys(dd);
-       props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
-               dev->z_pkey_violations;
-       props->qkey_viol_cntr = dev->qkey_violations;
-       props->active_width = dd->ipath_link_width_active;
-       /* See rate_show() */
-       props->active_speed = dd->ipath_link_speed_active;
-       props->max_vl_num = 1;          /* VLCap = VL0 */
-       props->init_type_reply = 0;
-
-       props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
-               break;
-       case 2048:
-               mtu = IB_MTU_2048;
-               break;
-       case 1024:
-               mtu = IB_MTU_1024;
-               break;
-       case 512:
-               mtu = IB_MTU_512;
-               break;
-       case 256:
-               mtu = IB_MTU_256;
-               break;
-       default:
-               mtu = IB_MTU_2048;
-       }
-       props->active_mtu = mtu;
-       props->subnet_timeout = dev->subnet_timeout;
-
-       return 0;
-}
-
-static int ipath_modify_device(struct ib_device *device,
-                              int device_modify_mask,
-                              struct ib_device_modify *device_modify)
-{
-       int ret;
-
-       if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
-                                  IB_DEVICE_MODIFY_NODE_DESC)) {
-               ret = -EOPNOTSUPP;
-               goto bail;
-       }
-
-       if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
-               memcpy(device->node_desc, device_modify->node_desc, 64);
-
-       if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
-               to_idev(device)->sys_image_guid =
-                       cpu_to_be64(device_modify->sys_image_guid);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static int ipath_modify_port(struct ib_device *ibdev,
-                            u8 port, int port_modify_mask,
-                            struct ib_port_modify *props)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-
-       dev->port_cap_flags |= props->set_port_cap_mask;
-       dev->port_cap_flags &= ~props->clr_port_cap_mask;
-       if (port_modify_mask & IB_PORT_SHUTDOWN)
-               ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
-       if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-               dev->qkey_violations = 0;
-       return 0;
-}
-
-static int ipath_query_gid(struct ib_device *ibdev, u8 port,
-                          int index, union ib_gid *gid)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       if (index >= 1) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       gid->global.subnet_prefix = dev->gid_prefix;
-       gid->global.interface_id = dev->dd->ipath_guid;
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
-                                   struct ib_ucontext *context,
-                                   struct ib_udata *udata)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       struct ipath_pd *pd;
-       struct ib_pd *ret;
-
-       /*
-        * This is actually totally arbitrary.  Some correctness tests
-        * assume there's a maximum number of PDs that can be allocated.
-        * We don't actually have this limit, but we fail the test if
-        * we allow allocations of more than we report for this value.
-        */
-
-       pd = kmalloc(sizeof *pd, GFP_KERNEL);
-       if (!pd) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       spin_lock(&dev->n_pds_lock);
-       if (dev->n_pds_allocated == ib_ipath_max_pds) {
-               spin_unlock(&dev->n_pds_lock);
-               kfree(pd);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       dev->n_pds_allocated++;
-       spin_unlock(&dev->n_pds_lock);
-
-       /* ib_alloc_pd() will initialize pd->ibpd. */
-       pd->user = udata != NULL;
-
-       ret = &pd->ibpd;
-
-bail:
-       return ret;
-}
-
-static int ipath_dealloc_pd(struct ib_pd *ibpd)
-{
-       struct ipath_pd *pd = to_ipd(ibpd);
-       struct ipath_ibdev *dev = to_idev(ibpd->device);
-
-       spin_lock(&dev->n_pds_lock);
-       dev->n_pds_allocated--;
-       spin_unlock(&dev->n_pds_lock);
-
-       kfree(pd);
-
-       return 0;
-}
-
-/**
- * ipath_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
-                                    struct ib_ah_attr *ah_attr)
-{
-       struct ipath_ah *ah;
-       struct ib_ah *ret;
-       struct ipath_ibdev *dev = to_idev(pd->device);
-       unsigned long flags;
-
-       /* A multicast address requires a GRH (see ch. 8.4.1). */
-       if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-           ah_attr->dlid != IPATH_PERMISSIVE_LID &&
-           !(ah_attr->ah_flags & IB_AH_GRH)) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (ah_attr->dlid == 0) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       if (ah_attr->port_num < 1 ||
-           ah_attr->port_num > pd->device->phys_port_cnt) {
-               ret = ERR_PTR(-EINVAL);
-               goto bail;
-       }
-
-       ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-       if (!ah) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       spin_lock_irqsave(&dev->n_ahs_lock, flags);
-       if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-               spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-               kfree(ah);
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       dev->n_ahs_allocated++;
-       spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-       /* ib_create_ah() will initialize ah->ibah. */
-       ah->attr = *ah_attr;
-       ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
-
-       ret = &ah->ibah;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int ipath_destroy_ah(struct ib_ah *ibah)
-{
-       struct ipath_ibdev *dev = to_idev(ibah->device);
-       struct ipath_ah *ah = to_iah(ibah);
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev->n_ahs_lock, flags);
-       dev->n_ahs_allocated--;
-       spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-       kfree(ah);
-
-       return 0;
-}
-
-static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-       struct ipath_ah *ah = to_iah(ibah);
-
-       *ah_attr = ah->attr;
-       ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
-
-       return 0;
-}
-
-/**
- * ipath_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_get_npkeys(struct ipath_devdata *dd)
-{
-       return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-/**
- * ipath_get_pkey - return the indexed PKEY from the port PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-       unsigned ret;
-
-       /* always a kernel port, no locking needed */
-       if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-               ret = 0;
-       else
-               ret = dd->ipath_pd[0]->port_pkeys[index];
-
-       return ret;
-}
-
-static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-                           u16 *pkey)
-{
-       struct ipath_ibdev *dev = to_idev(ibdev);
-       int ret;
-
-       if (index >= ipath_get_npkeys(dev->dd)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       *pkey = ipath_get_pkey(dev->dd, index);
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-/**
- * ipath_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the InfiniPath driver
- */
-
-static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
-                                               struct ib_udata *udata)
-{
-       struct ipath_ucontext *context;
-       struct ib_ucontext *ret;
-
-       context = kmalloc(sizeof *context, GFP_KERNEL);
-       if (!context) {
-               ret = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
-
-       ret = &context->ibucontext;
-
-bail:
-       return ret;
-}
-
-static int ipath_dealloc_ucontext(struct ib_ucontext *context)
-{
-       kfree(to_iucontext(context));
-       return 0;
-}
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev);
-
-static void __verbs_timer(unsigned long arg)
-{
-       struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-       /* Handle verbs layer timeouts. */
-       ipath_ib_timer(dd->verbs_dev);
-
-       mod_timer(&dd->verbs_timer, jiffies + 1);
-}
-
-static int enable_timer(struct ipath_devdata *dd)
-{
-       /*
-        * Early chips had a design flaw where the chip and kernel idea
-        * of the tail register don't always agree, and therefore we won't
-        * get an interrupt on the next packet received.
-        * If the board supports per packet receive interrupts, use it.
-        * Otherwise, the timer function periodically checks for packets
-        * to cover this case.
-        * Either way, the timer is needed for verbs layer related
-        * processing.
-        */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-                                0x2074076542310ULL);
-               /* Enable GPIO bit 2 interrupt */
-               dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-       }
-
-       setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
-
-       dd->verbs_timer.expires = jiffies + 1;
-       add_timer(&dd->verbs_timer);
-
-       return 0;
-}
-
-static int disable_timer(struct ipath_devdata *dd)
-{
-       /* Disable GPIO bit 2 interrupt */
-       if (dd->ipath_flags & IPATH_GPIO_INTR) {
-                /* Disable GPIO bit 2 interrupt */
-               dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
-               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-                                dd->ipath_gpio_mask);
-               /*
-                * We might want to undo changes to debugportselect,
-                * but how?
-                */
-       }
-
-       del_timer_sync(&dd->verbs_timer);
-
-       return 0;
-}
-
-static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
-                               struct ib_port_immutable *immutable)
-{
-       struct ib_port_attr attr;
-       int err;
-
-       err = ipath_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-       return 0;
-}
-
-/**
- * ipath_register_ib_device - register our device with the infiniband core
- * @dd: the device data structure
- * Return the allocated ipath_ibdev pointer or NULL on error.
- */
-int ipath_register_ib_device(struct ipath_devdata *dd)
-{
-       struct ipath_verbs_counters cntrs;
-       struct ipath_ibdev *idev;
-       struct ib_device *dev;
-       struct ipath_verbs_txreq *tx;
-       unsigned i;
-       int ret;
-
-       idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-       if (idev == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-
-       dev = &idev->ibdev;
-
-       if (dd->ipath_sdma_descq_cnt) {
-               tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
-                                  GFP_KERNEL);
-               if (tx == NULL) {
-                       ret = -ENOMEM;
-                       goto err_tx;
-               }
-       } else
-               tx = NULL;
-       idev->txreq_bufs = tx;
-
-       /* Only need to initialize non-zero fields. */
-       spin_lock_init(&idev->n_pds_lock);
-       spin_lock_init(&idev->n_ahs_lock);
-       spin_lock_init(&idev->n_cqs_lock);
-       spin_lock_init(&idev->n_qps_lock);
-       spin_lock_init(&idev->n_srqs_lock);
-       spin_lock_init(&idev->n_mcast_grps_lock);
-
-       spin_lock_init(&idev->qp_table.lock);
-       spin_lock_init(&idev->lk_table.lock);
-       idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-       /* Set the prefix to the default value (see ch. 4.1.1) */
-       idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
-
-       ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
-       if (ret)
-               goto err_qp;
-
-       /*
-        * The top ib_ipath_lkey_table_size bits are used to index the
-        * table.  The lower 8 bits can be owned by the user (copied from
-        * the LKEY).  The remaining bits act as a generation number or tag.
-        */
-       idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
-       idev->lk_table.table = kcalloc(idev->lk_table.max,
-                                      sizeof(*idev->lk_table.table),
-                                      GFP_KERNEL);
-       if (idev->lk_table.table == NULL) {
-               ret = -ENOMEM;
-               goto err_lk;
-       }
-       INIT_LIST_HEAD(&idev->pending_mmaps);
-       spin_lock_init(&idev->pending_lock);
-       idev->mmap_offset = PAGE_SIZE;
-       spin_lock_init(&idev->mmap_offset_lock);
-       INIT_LIST_HEAD(&idev->pending[0]);
-       INIT_LIST_HEAD(&idev->pending[1]);
-       INIT_LIST_HEAD(&idev->pending[2]);
-       INIT_LIST_HEAD(&idev->piowait);
-       INIT_LIST_HEAD(&idev->rnrwait);
-       INIT_LIST_HEAD(&idev->txreq_free);
-       idev->pending_index = 0;
-       idev->port_cap_flags =
-               IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
-       if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
-               idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-       idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-       idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-       idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-       idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-       idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-       /* Snapshot current HW counters to "clear" them. */
-       ipath_get_counters(dd, &cntrs);
-       idev->z_symbol_error_counter = cntrs.symbol_error_counter;
-       idev->z_link_error_recovery_counter =
-               cntrs.link_error_recovery_counter;
-       idev->z_link_downed_counter = cntrs.link_downed_counter;
-       idev->z_port_rcv_errors = cntrs.port_rcv_errors;
-       idev->z_port_rcv_remphys_errors =
-               cntrs.port_rcv_remphys_errors;
-       idev->z_port_xmit_discards = cntrs.port_xmit_discards;
-       idev->z_port_xmit_data = cntrs.port_xmit_data;
-       idev->z_port_rcv_data = cntrs.port_rcv_data;
-       idev->z_port_xmit_packets = cntrs.port_xmit_packets;
-       idev->z_port_rcv_packets = cntrs.port_rcv_packets;
-       idev->z_local_link_integrity_errors =
-               cntrs.local_link_integrity_errors;
-       idev->z_excessive_buffer_overrun_errors =
-               cntrs.excessive_buffer_overrun_errors;
-       idev->z_vl15_dropped = cntrs.vl15_dropped;
-
-       for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
-               list_add(&tx->txreq.list, &idev->txreq_free);
-
-       /*
-        * The system image GUID is supposed to be the same for all
-        * IB HCAs in a single system but since there can be other
-        * device types in the system, we can't be sure this is unique.
-        */
-       if (!sys_image_guid)
-               sys_image_guid = dd->ipath_guid;
-       idev->sys_image_guid = sys_image_guid;
-       idev->ib_unit = dd->ipath_unit;
-       idev->dd = dd;
-
-       strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
-       dev->owner = THIS_MODULE;
-       dev->node_guid = dd->ipath_guid;
-       dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
-       dev->uverbs_cmd_mask =
-               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-               (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-               (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
-               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-               (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-               (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-               (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-               (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-               (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-               (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-               (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-               (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-       dev->node_type = RDMA_NODE_IB_CA;
-       dev->phys_port_cnt = 1;
-       dev->num_comp_vectors = 1;
-       dev->dma_device = &dd->pcidev->dev;
-       dev->query_device = ipath_query_device;
-       dev->modify_device = ipath_modify_device;
-       dev->query_port = ipath_query_port;
-       dev->modify_port = ipath_modify_port;
-       dev->query_pkey = ipath_query_pkey;
-       dev->query_gid = ipath_query_gid;
-       dev->alloc_ucontext = ipath_alloc_ucontext;
-       dev->dealloc_ucontext = ipath_dealloc_ucontext;
-       dev->alloc_pd = ipath_alloc_pd;
-       dev->dealloc_pd = ipath_dealloc_pd;
-       dev->create_ah = ipath_create_ah;
-       dev->destroy_ah = ipath_destroy_ah;
-       dev->query_ah = ipath_query_ah;
-       dev->create_srq = ipath_create_srq;
-       dev->modify_srq = ipath_modify_srq;
-       dev->query_srq = ipath_query_srq;
-       dev->destroy_srq = ipath_destroy_srq;
-       dev->create_qp = ipath_create_qp;
-       dev->modify_qp = ipath_modify_qp;
-       dev->query_qp = ipath_query_qp;
-       dev->destroy_qp = ipath_destroy_qp;
-       dev->post_send = ipath_post_send;
-       dev->post_recv = ipath_post_receive;
-       dev->post_srq_recv = ipath_post_srq_receive;
-       dev->create_cq = ipath_create_cq;
-       dev->destroy_cq = ipath_destroy_cq;
-       dev->resize_cq = ipath_resize_cq;
-       dev->poll_cq = ipath_poll_cq;
-       dev->req_notify_cq = ipath_req_notify_cq;
-       dev->get_dma_mr = ipath_get_dma_mr;
-       dev->reg_phys_mr = ipath_reg_phys_mr;
-       dev->reg_user_mr = ipath_reg_user_mr;
-       dev->dereg_mr = ipath_dereg_mr;
-       dev->alloc_fmr = ipath_alloc_fmr;
-       dev->map_phys_fmr = ipath_map_phys_fmr;
-       dev->unmap_fmr = ipath_unmap_fmr;
-       dev->dealloc_fmr = ipath_dealloc_fmr;
-       dev->attach_mcast = ipath_multicast_attach;
-       dev->detach_mcast = ipath_multicast_detach;
-       dev->process_mad = ipath_process_mad;
-       dev->mmap = ipath_mmap;
-       dev->dma_ops = &ipath_dma_mapping_ops;
-       dev->get_port_immutable = ipath_port_immutable;
-
-       snprintf(dev->node_desc, sizeof(dev->node_desc),
-                IPATH_IDSTR " %s", init_utsname()->nodename);
-
-       ret = ib_register_device(dev, NULL);
-       if (ret)
-               goto err_reg;
-
-       ret = ipath_verbs_register_sysfs(dev);
-       if (ret)
-               goto err_class;
-
-       enable_timer(dd);
-
-       goto bail;
-
-err_class:
-       ib_unregister_device(dev);
-err_reg:
-       kfree(idev->lk_table.table);
-err_lk:
-       kfree(idev->qp_table.table);
-err_qp:
-       kfree(idev->txreq_bufs);
-err_tx:
-       ib_dealloc_device(dev);
-       ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-       idev = NULL;
-
-bail:
-       dd->verbs_dev = idev;
-       return ret;
-}
-
-void ipath_unregister_ib_device(struct ipath_ibdev *dev)
-{
-       struct ib_device *ibdev = &dev->ibdev;
-       u32 qps_inuse;
-
-       ib_unregister_device(ibdev);
-
-       disable_timer(dev->dd);
-
-       if (!list_empty(&dev->pending[0]) ||
-           !list_empty(&dev->pending[1]) ||
-           !list_empty(&dev->pending[2]))
-               ipath_dev_err(dev->dd, "pending list not empty!\n");
-       if (!list_empty(&dev->piowait))
-               ipath_dev_err(dev->dd, "piowait list not empty!\n");
-       if (!list_empty(&dev->rnrwait))
-               ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
-       if (!ipath_mcast_tree_empty())
-               ipath_dev_err(dev->dd, "multicast table memory leak!\n");
-       /*
-        * Note that ipath_unregister_ib_device() can be called before all
-        * the QPs are destroyed!
-        */
-       qps_inuse = ipath_free_all_qps(&dev->qp_table);
-       if (qps_inuse)
-               ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
-                       qps_inuse);
-       kfree(dev->qp_table.table);
-       kfree(dev->lk_table.table);
-       kfree(dev->txreq_bufs);
-       ib_dealloc_device(ibdev);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-
-       return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-                       char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-       int ret;
-
-       ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
-       if (ret < 0)
-               goto bail;
-       strcat(buf, "\n");
-       ret = strlen(buf);
-
-bail:
-       return ret;
-}
-
-static ssize_t show_stats(struct device *device, struct device_attribute *attr,
-                         char *buf)
-{
-       struct ipath_ibdev *dev =
-               container_of(device, struct ipath_ibdev, ibdev.dev);
-       int i;
-       int len;
-
-       len = sprintf(buf,
-                     "RC resends  %d\n"
-                     "RC no QACK  %d\n"
-                     "RC ACKs     %d\n"
-                     "RC SEQ NAKs %d\n"
-                     "RC RDMA seq %d\n"
-                     "RC RNR NAKs %d\n"
-                     "RC OTH NAKs %d\n"
-                     "RC timeouts %d\n"
-                     "RC RDMA dup %d\n"
-                     "piobuf wait %d\n"
-                     "unaligned   %d\n"
-                     "PKT drops   %d\n"
-                     "WQE errs    %d\n",
-                     dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
-                     dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
-                     dev->n_other_naks, dev->n_timeouts,
-                     dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
-                     dev->n_pkt_drops, dev->n_wqe_errs);
-       for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
-               const struct ipath_opcode_stats *si = &dev->opstats[i];
-
-               if (!si->n_packets && !si->n_bytes)
-                       continue;
-               len += sprintf(buf + len, "%02x %llu/%llu\n", i,
-                              (unsigned long long) si->n_packets,
-                              (unsigned long long) si->n_bytes);
-       }
-       return len;
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
-
-static struct device_attribute *ipath_class_attributes[] = {
-       &dev_attr_hw_rev,
-       &dev_attr_hca_type,
-       &dev_attr_board_id,
-       &dev_attr_stats
-};
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev)
-{
-       int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
-               ret = device_create_file(&dev->dev,
-                                      ipath_class_attributes[i]);
-               if (ret)
-                       goto bail;
-       }
-       return 0;
-bail:
-       for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
-               device_remove_file(&dev->dev, ipath_class_attributes[i]);
-       return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
deleted file mode 100644 (file)
index 0a90a56..0000000
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IPATH_VERBS_H
-#define IPATH_VERBS_H
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/kref.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "ipath_kernel.h"
-
-#define IPATH_MAX_RDMA_ATOMIC  4
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define IPATH_UVERBS_ABI_VERSION       2
-
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE     (IB_CQ_NEXT_COMP + 1)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK                     0x20
-#define IB_NAK_PSN_ERROR               0x60
-#define IB_NAK_INVALID_REQUEST         0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR     0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST      0x64
-
-/* Flags for checking QP state (see ib_ipath_state_ops[]) */
-#define IPATH_POST_SEND_OK             0x01
-#define IPATH_POST_RECV_OK             0x02
-#define IPATH_PROCESS_RECV_OK          0x04
-#define IPATH_PROCESS_SEND_OK          0x08
-#define IPATH_PROCESS_NEXT_SEND_OK     0x10
-#define IPATH_FLUSH_SEND               0x20
-#define IPATH_FLUSH_RECV               0x40
-#define IPATH_PROCESS_OR_FLUSH_SEND \
-       (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
-
-/* IB Performance Manager status values */
-#define IB_PMA_SAMPLE_STATUS_DONE      0x00
-#define IB_PMA_SAMPLE_STATUS_STARTED   0x01
-#define IB_PMA_SAMPLE_STATUS_RUNNING   0x02
-
-/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA  cpu_to_be16(0x0001)
-#define IB_PMA_PORT_RCV_DATA   cpu_to_be16(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS  cpu_to_be16(0x0003)
-#define IB_PMA_PORT_RCV_PKTS   cpu_to_be16(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT  cpu_to_be16(0x0005)
-
-struct ib_reth {
-       __be64 vaddr;
-       __be32 rkey;
-       __be32 length;
-} __attribute__ ((packed));
-
-struct ib_atomic_eth {
-       __be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
-       __be32 rkey;
-       __be64 swap_data;
-       __be64 compare_data;
-} __attribute__ ((packed));
-
-struct ipath_other_headers {
-       __be32 bth[3];
-       union {
-               struct {
-                       __be32 deth[2];
-                       __be32 imm_data;
-               } ud;
-               struct {
-                       struct ib_reth reth;
-                       __be32 imm_data;
-               } rc;
-               struct {
-                       __be32 aeth;
-                       __be32 atomic_ack_eth[2];
-               } at;
-               __be32 imm_data;
-               __be32 aeth;
-               struct ib_atomic_eth atomic_eth;
-       } u;
-} __attribute__ ((packed));
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct ipath_ib_header {
-       __be16 lrh[4];
-       union {
-               struct {
-                       struct ib_grh grh;
-                       struct ipath_other_headers oth;
-               } l;
-               struct ipath_other_headers oth;
-       } u;
-} __attribute__ ((packed));
-
-struct ipath_pio_header {
-       __le32 pbc[2];
-       struct ipath_ib_header hdr;
-} __attribute__ ((packed));
-
-/*
- * There is one struct ipath_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct ipath_mcast_qp.
- */
-struct ipath_mcast_qp {
-       struct list_head list;
-       struct ipath_qp *qp;
-};
-
-struct ipath_mcast {
-       struct rb_node rb_node;
-       union ib_gid mgid;
-       struct list_head qp_list;
-       wait_queue_head_t wait;
-       atomic_t refcount;
-       int n_attached;
-};
-
-/* Protection domain */
-struct ipath_pd {
-       struct ib_pd ibpd;
-       int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct ipath_ah {
-       struct ib_ah ibah;
-       struct ib_ah_attr attr;
-};
-
-/*
- * This structure is used by ipath_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct ipath_mmap_info {
-       struct list_head pending_mmaps;
-       struct ib_ucontext *context;
-       void *obj;
-       __u64 offset;
-       struct kref ref;
-       unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct ipath_cq_wc {
-       u32 head;               /* index of next entry to fill */
-       u32 tail;               /* index of next ib_poll_cq() entry */
-       union {
-               /* these are actually size ibcq.cqe + 1 */
-               struct ib_uverbs_wc uqueue[0];
-               struct ib_wc kqueue[0];
-       };
-};
-
-/*
- * The completion queue structure.
- */
-struct ipath_cq {
-       struct ib_cq ibcq;
-       struct tasklet_struct comptask;
-       spinlock_t lock;
-       u8 notify;
-       u8 triggered;
-       struct ipath_cq_wc *queue;
-       struct ipath_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-       void *vaddr;
-       size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-       struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-       struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-       u64 user_base;          /* User's address for this region */
-       u64 iova;               /* IB start address of this region */
-       size_t length;
-       u32 lkey;
-       u32 offset;             /* offset (bytes) to start of region */
-       int access_flags;
-       u32 max_segs;           /* number of ipath_segs in all the arrays */
-       u32 mapsz;              /* size of the map array */
-       struct ipath_segarray *map[0];  /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-       struct ipath_mregion *mr;
-       void *vaddr;            /* kernel virtual address of segment */
-       u32 sge_length;         /* length of the SGE */
-       u32 length;             /* remaining length of the segment */
-       u16 m;                  /* current index: mr->map[m] */
-       u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct ipath_mr {
-       struct ib_mr ibmr;
-       struct ib_umem *umem;
-       struct ipath_mregion mr;        /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct ipath_swqe {
-       union {
-               struct ib_send_wr wr;   /* don't use wr.sg_list */
-               struct ib_ud_wr ud_wr;
-               struct ib_rdma_wr rdma_wr;
-               struct ib_atomic_wr atomic_wr;
-       };
-
-       u32 psn;                /* first packet sequence number */
-       u32 lpsn;               /* last packet sequence number */
-       u32 ssn;                /* send sequence number */
-       u32 length;             /* total length of data in sg_list */
-       struct ipath_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct ipath_rwqe {
-       u64 wr_id;
-       u8 num_sge;
-       struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct ipath_rwq {
-       u32 head;               /* new work requests posted to the head */
-       u32 tail;               /* receives pull requests from here. */
-       struct ipath_rwqe wq[0];
-};
-
-struct ipath_rq {
-       struct ipath_rwq *wq;
-       spinlock_t lock;
-       u32 size;               /* size of RWQE array */
-       u8 max_sge;
-};
-
-struct ipath_srq {
-       struct ib_srq ibsrq;
-       struct ipath_rq rq;
-       struct ipath_mmap_info *ip;
-       /* send signal when number of RWQEs < limit */
-       u32 limit;
-};
-
-struct ipath_sge_state {
-       struct ipath_sge *sg_list;      /* next SGE to be used if any */
-       struct ipath_sge sge;   /* progress state for the current SGE */
-       u8 num_sge;
-       u8 static_rate;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct ipath_ack_entry {
-       u8 opcode;
-       u8 sent;
-       u32 psn;
-       union {
-               struct ipath_sge_state rdma_sge;
-               u64 atomic_data;
-       };
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct ipath_qp {
-       struct ib_qp ibqp;
-       struct ipath_qp *next;          /* link list for QPN hash table */
-       struct ipath_qp *timer_next;    /* link list for ipath_ib_timer() */
-       struct ipath_qp *pio_next;      /* link for ipath_ib_piobufavail() */
-       struct list_head piowait;       /* link for wait PIO buf */
-       struct list_head timerwait;     /* link for waiting for timeouts */
-       struct ib_ah_attr remote_ah_attr;
-       struct ipath_ib_header s_hdr;   /* next packet header to send */
-       atomic_t refcount;
-       wait_queue_head_t wait;
-       wait_queue_head_t wait_dma;
-       struct tasklet_struct s_task;
-       struct ipath_mmap_info *ip;
-       struct ipath_sge_state *s_cur_sge;
-       struct ipath_verbs_txreq *s_tx;
-       struct ipath_sge_state s_sge;   /* current send request data */
-       struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
-       struct ipath_sge_state s_ack_rdma_sge;
-       struct ipath_sge_state s_rdma_read_sge;
-       struct ipath_sge_state r_sge;   /* current receive data */
-       spinlock_t s_lock;
-       atomic_t s_dma_busy;
-       u16 s_pkt_delay;
-       u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-       u32 s_cur_size;         /* size of send packet in bytes */
-       u32 s_len;              /* total length of s_sge */
-       u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-       u32 s_next_psn;         /* PSN for next request */
-       u32 s_last_psn;         /* last response PSN processed */
-       u32 s_psn;              /* current packet sequence number */
-       u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-       u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-       u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-       u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-       u64 r_wr_id;            /* ID for current receive WQE */
-       unsigned long r_aflags;
-       u32 r_len;              /* total length of r_sge */
-       u32 r_rcv_len;          /* receive data len processed */
-       u32 r_psn;              /* expected rcv packet sequence number */
-       u32 r_msn;              /* message sequence number */
-       u8 state;               /* QP state */
-       u8 s_state;             /* opcode of last packet sent */
-       u8 s_ack_state;         /* opcode of packet to ACK */
-       u8 s_nak_state;         /* non-zero if NAK is pending */
-       u8 r_state;             /* opcode of last packet received */
-       u8 r_nak_state;         /* non-zero if NAK is pending */
-       u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-       u8 r_flags;
-       u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-       u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-       u8 qp_access_flags;
-       u8 s_max_sge;           /* size of s_wq->sg_list */
-       u8 s_retry_cnt;         /* number of times to retry */
-       u8 s_rnr_retry_cnt;
-       u8 s_retry;             /* requester retry counter */
-       u8 s_rnr_retry;         /* requester RNR retry counter */
-       u8 s_pkey_index;        /* PKEY index to use */
-       u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-       u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-       u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-       u8 s_flags;
-       u8 s_dmult;
-       u8 s_draining;
-       u8 timeout;             /* Timeout for this QP */
-       enum ib_mtu path_mtu;
-       u32 remote_qpn;
-       u32 qkey;               /* QKEY for this QP (for UD or RD) */
-       u32 s_size;             /* send work queue size */
-       u32 s_head;             /* new entries added here */
-       u32 s_tail;             /* next entry to process */
-       u32 s_cur;              /* current work queue entry */
-       u32 s_last;             /* last un-ACK'ed entry */
-       u32 s_ssn;              /* SSN of tail entry */
-       u32 s_lsn;              /* limit sequence number (credit) */
-       struct ipath_swqe *s_wq;        /* send work queue */
-       struct ipath_swqe *s_wqe;
-       struct ipath_sge *r_ud_sg_list;
-       struct ipath_rq r_rq;           /* receive work queue */
-       struct ipath_sge r_sg_list[0];  /* verified SGEs */
-};
-
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define IPATH_R_WRID_VALID     0
-
-/*
- * Bit definitions for r_flags.
- */
-#define IPATH_R_REUSE_SGE      0x01
-#define IPATH_R_RDMAR_SEQ      0x02
-
-/*
- * Bit definitions for s_flags.
- *
- * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
- *                        before processing the next SWQE
- * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
- *                        before processing the next SWQE
- * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
- * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                   next send completion entry not via send DMA.
- */
-#define IPATH_S_SIGNAL_REQ_WR  0x01
-#define IPATH_S_FENCE_PENDING  0x02
-#define IPATH_S_RDMAR_PENDING  0x04
-#define IPATH_S_ACK_PENDING    0x08
-#define IPATH_S_BUSY           0x10
-#define IPATH_S_WAITING                0x20
-#define IPATH_S_WAIT_SSN_CREDIT        0x40
-#define IPATH_S_WAIT_DMA       0x80
-
-#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
-       IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
-
-#define IPATH_PSN_CREDIT       512
-
-/*
- * Since struct ipath_swqe is not a fixed size, we can't simply index into
- * struct ipath_qp.s_wq.  This function does the array index computation.
- */
-static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
-                                             unsigned n)
-{
-       return (struct ipath_swqe *)((char *)qp->s_wq +
-                                    (sizeof(struct ipath_swqe) +
-                                     qp->s_max_sge *
-                                     sizeof(struct ipath_sge)) * n);
-}
-
-/*
- * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rwq.wq.  This function does the array index computation.
- */
-static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
-                                             unsigned n)
-{
-       return (struct ipath_rwqe *)
-               ((char *) rq->wq->wq +
-                (sizeof(struct ipath_rwqe) +
-                 rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-       atomic_t n_free;
-       void *page;
-};
-
-struct ipath_qp_table {
-       spinlock_t lock;
-       u32 last;               /* last QP number allocated */
-       u32 max;                /* size of the hash table */
-       u32 nmaps;              /* size of the map table */
-       struct ipath_qp **table;
-       /* bit map of free numbers */
-       struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct ipath_lkey_table {
-       spinlock_t lock;
-       u32 next;               /* next unused index (speeds search) */
-       u32 gen;                /* generation count */
-       u32 max;                /* size of the table */
-       struct ipath_mregion **table;
-};
-
-struct ipath_opcode_stats {
-       u64 n_packets;          /* number of packets */
-       u64 n_bytes;            /* total number of bytes */
-};
-
-struct ipath_ibdev {
-       struct ib_device ibdev;
-       struct ipath_devdata *dd;
-       struct list_head pending_mmaps;
-       spinlock_t mmap_offset_lock;
-       u32 mmap_offset;
-       int ib_unit;            /* This is the device number */
-       u16 sm_lid;             /* in host order */
-       u8 sm_sl;
-       u8 mkeyprot;
-       /* non-zero when timer is set */
-       unsigned long mkey_lease_timeout;
-
-       /* The following fields are really per port. */
-       struct ipath_qp_table qp_table;
-       struct ipath_lkey_table lk_table;
-       struct list_head pending[3];    /* FIFO of QPs waiting for ACKs */
-       struct list_head piowait;       /* list for wait PIO buf */
-       struct list_head txreq_free;
-       void *txreq_bufs;
-       /* list of QPs waiting for RNR timer */
-       struct list_head rnrwait;
-       spinlock_t pending_lock;
-       __be64 sys_image_guid;  /* in network order */
-       __be64 gid_prefix;      /* in network order */
-       __be64 mkey;
-
-       u32 n_pds_allocated;    /* number of PDs allocated for device */
-       spinlock_t n_pds_lock;
-       u32 n_ahs_allocated;    /* number of AHs allocated for device */
-       spinlock_t n_ahs_lock;
-       u32 n_cqs_allocated;    /* number of CQs allocated for device */
-       spinlock_t n_cqs_lock;
-       u32 n_qps_allocated;    /* number of QPs allocated for device */
-       spinlock_t n_qps_lock;
-       u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-       spinlock_t n_srqs_lock;
-       u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-       spinlock_t n_mcast_grps_lock;
-
-       u64 ipath_sword;        /* total dwords sent (sample result) */
-       u64 ipath_rword;        /* total dwords received (sample result) */
-       u64 ipath_spkts;        /* total packets sent (sample result) */
-       u64 ipath_rpkts;        /* total packets received (sample result) */
-       /* # of ticks no data sent (sample result) */
-       u64 ipath_xmit_wait;
-       u64 rcv_errors;         /* # of packets with SW detected rcv errs */
-       u64 n_unicast_xmit;     /* total unicast packets sent */
-       u64 n_unicast_rcv;      /* total unicast packets received */
-       u64 n_multicast_xmit;   /* total multicast packets sent */
-       u64 n_multicast_rcv;    /* total multicast packets received */
-       u64 z_symbol_error_counter;             /* starting count for PMA */
-       u64 z_link_error_recovery_counter;      /* starting count for PMA */
-       u64 z_link_downed_counter;              /* starting count for PMA */
-       u64 z_port_rcv_errors;                  /* starting count for PMA */
-       u64 z_port_rcv_remphys_errors;          /* starting count for PMA */
-       u64 z_port_xmit_discards;               /* starting count for PMA */
-       u64 z_port_xmit_data;                   /* starting count for PMA */
-       u64 z_port_rcv_data;                    /* starting count for PMA */
-       u64 z_port_xmit_packets;                /* starting count for PMA */
-       u64 z_port_rcv_packets;                 /* starting count for PMA */
-       u32 z_pkey_violations;                  /* starting count for PMA */
-       u32 z_local_link_integrity_errors;      /* starting count for PMA */
-       u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
-       u32 z_vl15_dropped;                     /* starting count for PMA */
-       u32 n_rc_resends;
-       u32 n_rc_acks;
-       u32 n_rc_qacks;
-       u32 n_seq_naks;
-       u32 n_rdma_seq;
-       u32 n_rnr_naks;
-       u32 n_other_naks;
-       u32 n_timeouts;
-       u32 n_pkt_drops;
-       u32 n_vl15_dropped;
-       u32 n_wqe_errs;
-       u32 n_rdma_dup_busy;
-       u32 n_piowait;
-       u32 n_unaligned;
-       u32 port_cap_flags;
-       u32 pma_sample_start;
-       u32 pma_sample_interval;
-       __be16 pma_counter_select[5];
-       u16 pma_tag;
-       u16 qkey_violations;
-       u16 mkey_violations;
-       u16 mkey_lease_period;
-       u16 pending_index;      /* which pending queue is active */
-       u8 pma_sample_status;
-       u8 subnet_timeout;
-       u8 vl_high_limit;
-       struct ipath_opcode_stats opstats[128];
-};
-
-struct ipath_verbs_counters {
-       u64 symbol_error_counter;
-       u64 link_error_recovery_counter;
-       u64 link_downed_counter;
-       u64 port_rcv_errors;
-       u64 port_rcv_remphys_errors;
-       u64 port_xmit_discards;
-       u64 port_xmit_data;
-       u64 port_rcv_data;
-       u64 port_xmit_packets;
-       u64 port_rcv_packets;
-       u32 local_link_integrity_errors;
-       u32 excessive_buffer_overrun_errors;
-       u32 vl15_dropped;
-};
-
-struct ipath_verbs_txreq {
-       struct ipath_qp         *qp;
-       struct ipath_swqe       *wqe;
-       u32                      map_len;
-       u32                      len;
-       struct ipath_sge_state  *ss;
-       struct ipath_pio_header  hdr;
-       struct ipath_sdma_txreq  txreq;
-};
-
-static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
-{
-       return container_of(ibmr, struct ipath_mr, ibmr);
-}
-
-static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
-{
-       return container_of(ibpd, struct ipath_pd, ibpd);
-}
-
-static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
-{
-       return container_of(ibah, struct ipath_ah, ibah);
-}
-
-static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
-{
-       return container_of(ibcq, struct ipath_cq, ibcq);
-}
-
-static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
-{
-       return container_of(ibsrq, struct ipath_srq, ibsrq);
-}
-
-static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
-{
-       return container_of(ibqp, struct ipath_qp, ibqp);
-}
-
-static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
-{
-       return container_of(ibdev, struct ipath_ibdev, ibdev);
-}
-
-/*
- * This must be called with s_lock held.
- */
-static inline void ipath_schedule_send(struct ipath_qp *qp)
-{
-       if (qp->s_flags & IPATH_S_ANY_WAIT)
-               qp->s_flags &= ~IPATH_S_ANY_WAIT;
-       if (!(qp->s_flags & IPATH_S_BUSY))
-               tasklet_hi_schedule(&qp->s_task);
-}
-
-int ipath_process_mad(struct ib_device *ibdev,
-                     int mad_flags,
-                     u8 port_num,
-                     const struct ib_wc *in_wc,
-                     const struct ib_grh *in_grh,
-                     const struct ib_mad_hdr *in, size_t in_mad_size,
-                     struct ib_mad_hdr *out, size_t *out_mad_size,
-                     u16 *out_mad_pkey_index);
-
-/*
- * Compare the lower 24 bits of the two values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int ipath_cmp24(u32 a, u32 b)
-{
-       return (((int) a) - ((int) b)) << 8;
-}
-
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-                           u64 *rwords, u64 *spkts, u64 *rpkts,
-                           u64 *xmit_wait);
-
-int ipath_get_counters(struct ipath_devdata *dd,
-                      struct ipath_verbs_counters *cntrs);
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_mcast_tree_empty(void);
-
-__be32 ipath_compute_aeth(struct ipath_qp *qp);
-
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
-
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-                             struct ib_qp_init_attr *init_attr,
-                             struct ib_udata *udata);
-
-int ipath_destroy_qp(struct ib_qp *ibqp);
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                   int attr_mask, struct ib_udata *udata);
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                  int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
-
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
-
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-                    u32 hdrwords, struct ipath_sge_state *ss, u32 len);
-
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
-
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-                 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
-                    struct ipath_mregion *mr);
-
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-                 struct ib_sge *sge, int acc);
-
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-                 u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-                          struct ib_recv_wr **bad_wr);
-
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-                               struct ib_srq_init_attr *srq_init_attr,
-                               struct ib_udata *udata);
-
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-                    enum ib_srq_attr_mask attr_mask,
-                    struct ib_udata *udata);
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int ipath_destroy_srq(struct ib_srq *ibsrq);
-
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-                             const struct ib_cq_init_attr *attr,
-                             struct ib_ucontext *context,
-                             struct ib_udata *udata);
-
-int ipath_destroy_cq(struct ib_cq *ibcq);
-
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
-                               struct ib_phys_buf *buffer_list,
-                               int num_phys_buf, int acc, u64 *iova_start);
-
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-                               u64 virt_addr, int mr_access_flags,
-                               struct ib_udata *udata);
-
-int ipath_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                              struct ib_fmr_attr *fmr_attr);
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-                      int list_len, u64 iova);
-
-int ipath_unmap_fmr(struct list_head *fmr_list);
-
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
-
-void ipath_release_mmap_info(struct kref *ref);
-
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-                                              u32 size,
-                                              struct ib_ucontext *context,
-                                              void *obj);
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-                           struct ipath_mmap_info *ip,
-                           u32 size, void *obj);
-
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-void ipath_insert_rnr_queue(struct ipath_qp *qp);
-
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                  u32 *lengthp, struct ipath_sge_state *ss);
-
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
-
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-                  struct ib_global_route *grh, u32 hwords, u32 nwords);
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-                          struct ipath_other_headers *ohdr,
-                          u32 bth0, u32 bth2);
-
-void ipath_do_send(unsigned long data);
-
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                        enum ib_wc_status status);
-
-int ipath_make_rc_req(struct ipath_qp *qp);
-
-int ipath_make_uc_req(struct ipath_qp *qp);
-
-int ipath_make_ud_req(struct ipath_qp *qp);
-
-int ipath_register_ib_device(struct ipath_devdata *);
-
-void ipath_unregister_ib_device(struct ipath_ibdev *);
-
-void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
-
-int ipath_ib_piobufavail(struct ipath_ibdev *);
-
-unsigned ipath_get_npkeys(struct ipath_devdata *);
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *);
-
-unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
-
-extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
-
-/*
- * Below converts HCA-specific LinkTrainingState to IB PhysPortState
- * values.
- */
-extern const u8 ipath_cvt_physportstate[];
-#define IB_PHYSPORTSTATE_SLEEP 1
-#define IB_PHYSPORTSTATE_POLL 2
-#define IB_PHYSPORTSTATE_DISABLED 3
-#define IB_PHYSPORTSTATE_CFG_TRAIN 4
-#define IB_PHYSPORTSTATE_LINKUP 5
-#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
-
-extern const int ib_ipath_state_ops[];
-
-extern unsigned int ib_ipath_lkey_table_size;
-
-extern unsigned int ib_ipath_max_cqes;
-
-extern unsigned int ib_ipath_max_cqs;
-
-extern unsigned int ib_ipath_max_qp_wrs;
-
-extern unsigned int ib_ipath_max_qps;
-
-extern unsigned int ib_ipath_max_sges;
-
-extern unsigned int ib_ipath_max_mcast_grps;
-
-extern unsigned int ib_ipath_max_mcast_qp_attached;
-
-extern unsigned int ib_ipath_max_srqs;
-
-extern unsigned int ib_ipath_max_srq_sges;
-
-extern unsigned int ib_ipath_max_srq_wrs;
-
-extern const u32 ib_ipath_rnr_table[];
-
-extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
-
-#endif                         /* IPATH_VERBS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c b/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
deleted file mode 100644 (file)
index 72d476f..0000000
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-#include <linux/slab.h>
-
-#include "ipath_verbs.h"
-
-/*
- * Global table of GID to attached QPs.
- * The table is global to all ipath devices since a send from one QP/device
- * needs to be locally routed to any locally attached QPs on the same
- * or different device.
- */
-static struct rb_root mcast_tree;
-static DEFINE_SPINLOCK(mcast_lock);
-
-/**
- * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
-{
-       struct ipath_mcast_qp *mqp;
-
-       mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
-       if (!mqp)
-               goto bail;
-
-       mqp->qp = qp;
-       atomic_inc(&qp->refcount);
-
-bail:
-       return mqp;
-}
-
-static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
-{
-       struct ipath_qp *qp = mqp->qp;
-
-       /* Notify ipath_destroy_qp() if it is waiting. */
-       if (atomic_dec_and_test(&qp->refcount))
-               wake_up(&qp->wait);
-
-       kfree(mqp);
-}
-
-/**
- * ipath_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
-{
-       struct ipath_mcast *mcast;
-
-       mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
-       if (!mcast)
-               goto bail;
-
-       mcast->mgid = *mgid;
-       INIT_LIST_HEAD(&mcast->qp_list);
-       init_waitqueue_head(&mcast->wait);
-       atomic_set(&mcast->refcount, 0);
-       mcast->n_attached = 0;
-
-bail:
-       return mcast;
-}
-
-static void ipath_mcast_free(struct ipath_mcast *mcast)
-{
-       struct ipath_mcast_qp *p, *tmp;
-
-       list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-               ipath_mcast_qp_free(p);
-
-       kfree(mcast);
-}
-
-/**
- * ipath_mcast_find - search the global table for the given multicast GID
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
-{
-       struct rb_node *n;
-       unsigned long flags;
-       struct ipath_mcast *mcast;
-
-       spin_lock_irqsave(&mcast_lock, flags);
-       n = mcast_tree.rb_node;
-       while (n) {
-               int ret;
-
-               mcast = rb_entry(n, struct ipath_mcast, rb_node);
-
-               ret = memcmp(mgid->raw, mcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0)
-                       n = n->rb_left;
-               else if (ret > 0)
-                       n = n->rb_right;
-               else {
-                       atomic_inc(&mcast->refcount);
-                       spin_unlock_irqrestore(&mcast_lock, flags);
-                       goto bail;
-               }
-       }
-       spin_unlock_irqrestore(&mcast_lock, flags);
-
-       mcast = NULL;
-
-bail:
-       return mcast;
-}
-
-/**
- * ipath_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int ipath_mcast_add(struct ipath_ibdev *dev,
-                          struct ipath_mcast *mcast,
-                          struct ipath_mcast_qp *mqp)
-{
-       struct rb_node **n = &mcast_tree.rb_node;
-       struct rb_node *pn = NULL;
-       int ret;
-
-       spin_lock_irq(&mcast_lock);
-
-       while (*n) {
-               struct ipath_mcast *tmcast;
-               struct ipath_mcast_qp *p;
-
-               pn = *n;
-               tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
-
-               ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0) {
-                       n = &pn->rb_left;
-                       continue;
-               }
-               if (ret > 0) {
-                       n = &pn->rb_right;
-                       continue;
-               }
-
-               /* Search the QP list to see if this is already there. */
-               list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-                       if (p->qp == mqp->qp) {
-                               ret = ESRCH;
-                               goto bail;
-                       }
-               }
-               if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
-                       ret = ENOMEM;
-                       goto bail;
-               }
-
-               tmcast->n_attached++;
-
-               list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-               ret = EEXIST;
-               goto bail;
-       }
-
-       spin_lock(&dev->n_mcast_grps_lock);
-       if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
-               spin_unlock(&dev->n_mcast_grps_lock);
-               ret = ENOMEM;
-               goto bail;
-       }
-
-       dev->n_mcast_grps_allocated++;
-       spin_unlock(&dev->n_mcast_grps_lock);
-
-       mcast->n_attached++;
-
-       list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-       atomic_inc(&mcast->refcount);
-       rb_link_node(&mcast->rb_node, pn, n);
-       rb_insert_color(&mcast->rb_node, &mcast_tree);
-
-       ret = 0;
-
-bail:
-       spin_unlock_irq(&mcast_lock);
-
-       return ret;
-}
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_mcast *mcast;
-       struct ipath_mcast_qp *mqp;
-       int ret;
-
-       /*
-        * Allocate data structures since its better to do this outside of
-        * spin locks and it will most likely be needed.
-        */
-       mcast = ipath_mcast_alloc(gid);
-       if (mcast == NULL) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       mqp = ipath_mcast_qp_alloc(qp);
-       if (mqp == NULL) {
-               ipath_mcast_free(mcast);
-               ret = -ENOMEM;
-               goto bail;
-       }
-       switch (ipath_mcast_add(dev, mcast, mqp)) {
-       case ESRCH:
-               /* Neither was used: can't attach the same QP twice. */
-               ipath_mcast_qp_free(mqp);
-               ipath_mcast_free(mcast);
-               ret = -EINVAL;
-               goto bail;
-       case EEXIST:            /* The mcast wasn't used */
-               ipath_mcast_free(mcast);
-               break;
-       case ENOMEM:
-               /* Exceeded the maximum number of mcast groups. */
-               ipath_mcast_qp_free(mqp);
-               ipath_mcast_free(mcast);
-               ret = -ENOMEM;
-               goto bail;
-       default:
-               break;
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-       struct ipath_qp *qp = to_iqp(ibqp);
-       struct ipath_ibdev *dev = to_idev(ibqp->device);
-       struct ipath_mcast *mcast = NULL;
-       struct ipath_mcast_qp *p, *tmp;
-       struct rb_node *n;
-       int last = 0;
-       int ret;
-
-       spin_lock_irq(&mcast_lock);
-
-       /* Find the GID in the mcast table. */
-       n = mcast_tree.rb_node;
-       while (1) {
-               if (n == NULL) {
-                       spin_unlock_irq(&mcast_lock);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               mcast = rb_entry(n, struct ipath_mcast, rb_node);
-               ret = memcmp(gid->raw, mcast->mgid.raw,
-                            sizeof(union ib_gid));
-               if (ret < 0)
-                       n = n->rb_left;
-               else if (ret > 0)
-                       n = n->rb_right;
-               else
-                       break;
-       }
-
-       /* Search the QP list. */
-       list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-               if (p->qp != qp)
-                       continue;
-               /*
-                * We found it, so remove it, but don't poison the forward
-                * link until we are sure there are no list walkers.
-                */
-               list_del_rcu(&p->list);
-               mcast->n_attached--;
-
-               /* If this was the last attached QP, remove the GID too. */
-               if (list_empty(&mcast->qp_list)) {
-                       rb_erase(&mcast->rb_node, &mcast_tree);
-                       last = 1;
-               }
-               break;
-       }
-
-       spin_unlock_irq(&mcast_lock);
-
-       if (p) {
-               /*
-                * Wait for any list walkers to finish before freeing the
-                * list element.
-                */
-               wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-               ipath_mcast_qp_free(p);
-       }
-       if (last) {
-               atomic_dec(&mcast->refcount);
-               wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-               ipath_mcast_free(mcast);
-               spin_lock_irq(&dev->n_mcast_grps_lock);
-               dev->n_mcast_grps_allocated--;
-               spin_unlock_irq(&dev->n_mcast_grps_lock);
-       }
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
-int ipath_mcast_tree_empty(void)
-{
-       return mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c b/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
deleted file mode 100644 (file)
index 1a7e20a..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on PowerPC only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * Nothing to do on PowerPC, so just return without error.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-       return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c b/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
deleted file mode 100644 (file)
index 7b6e4c8..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on x86_64 only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include <linux/pci.h>
-#include <asm/processor.h>
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
- * write combining.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-       int ret = 0;
-       u64 pioaddr, piolen;
-       unsigned bits;
-       const unsigned long addr = pci_resource_start(dd->pcidev, 0);
-       const size_t len = pci_resource_len(dd->pcidev, 0);
-
-       /*
-        * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
-        * chip.  Linux (possibly the hardware) requires it to be on a power
-        * of 2 address matching the length (which has to be a power of 2).
-        * For rev1, that means the base address, for rev2, it will be just
-        * the PIO buffers themselves.
-        * For chips with two sets of buffers, the calculations are
-        * somewhat more complicated; we need to sum, and the piobufbase
-        * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
-        * The buffers are still packed, so a single range covers both.
-        */
-       if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
-               unsigned long pio2kbase, pio4kbase;
-               pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
-               pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
-               if (pio2kbase < pio4kbase) { /* all, for now */
-                       pioaddr = addr + pio2kbase;
-                       piolen = pio4kbase - pio2kbase +
-                               dd->ipath_piobcnt4k * dd->ipath_4kalign;
-               } else {
-                       pioaddr = addr + pio4kbase;
-                       piolen = pio2kbase - pio4kbase +
-                               dd->ipath_piobcnt2k * dd->ipath_palign;
-               }
-       } else {  /* single buffer size (2K, currently) */
-               pioaddr = addr + dd->ipath_piobufbase;
-               piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
-                       dd->ipath_piobcnt4k * dd->ipath_4kalign;
-       }
-
-       for (bits = 0; !(piolen & (1ULL << bits)); bits++)
-               /* do nothing */ ;
-
-       if (piolen != (1ULL << bits)) {
-               piolen >>= bits;
-               while (piolen >>= 1)
-                       bits++;
-               piolen = 1ULL << (bits + 1);
-       }
-       if (pioaddr & (piolen - 1)) {
-               u64 atmp;
-               ipath_dbg("pioaddr %llx not on right boundary for size "
-                         "%llx, fixing\n",
-                         (unsigned long long) pioaddr,
-                         (unsigned long long) piolen);
-               atmp = pioaddr & ~(piolen - 1);
-               if (atmp < addr || (atmp + piolen) > (addr + len)) {
-                       ipath_dev_err(dd, "No way to align address/size "
-                                     "(%llx/%llx), no WC mtrr\n",
-                                     (unsigned long long) atmp,
-                                     (unsigned long long) piolen << 1);
-                       ret = -ENODEV;
-               } else {
-                       ipath_dbg("changing WC base from %llx to %llx, "
-                                 "len from %llx to %llx\n",
-                                 (unsigned long long) pioaddr,
-                                 (unsigned long long) atmp,
-                                 (unsigned long long) piolen,
-                                 (unsigned long long) piolen << 1);
-                       pioaddr = atmp;
-                       piolen <<= 1;
-               }
-       }
-
-       if (!ret) {
-               dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
-               if (dd->wc_cookie < 0) {
-                       ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
-                       ret = -ENODEV;
-               } else if (dd->wc_cookie == 0)
-                       ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
-               else
-                       ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
-       }
-
-       return ret;
-}
-
-/**
- * ipath_disable_wc - disable write combining for MMIO writes to the device
- * @dd: infinipath device
- */
-void ipath_disable_wc(struct ipath_devdata *dd)
-{
-       arch_phys_wc_del(dd->wc_cookie);
-}
index efd6f4560d3e55ceeab571fdc1d0ebc242a38df2..7e8037e230b8e024ae2bb9d8797bba1d9eab7141 100644 (file)
@@ -1,7 +1,7 @@
 menu "Speakup console speech"
 
 config SPEAKUP
-       depends on VT
+       depends on VT && !MN10300
        tristate "Speakup core"
        ---help---
                This is the Speakup screen reader.  Think of it as a
index 63c59bc89b040fe68fcf01d364d5b636e8ac6707..30cf973f326dc467fba09f1a7c85a05acf0f9101 100644 (file)
@@ -264,8 +264,9 @@ static struct notifier_block vt_notifier_block = {
        .notifier_call = vt_notifier_call,
 };
 
-static unsigned char get_attributes(u16 *pos)
+static unsigned char get_attributes(struct vc_data *vc, u16 *pos)
 {
+       pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
        return (u_char) (scr_readw(pos) >> 8);
 }
 
@@ -275,7 +276,7 @@ static void speakup_date(struct vc_data *vc)
        spk_y = spk_cy = vc->vc_y;
        spk_pos = spk_cp = vc->vc_pos;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
 }
 
 static void bleep(u_short val)
@@ -469,8 +470,12 @@ static u16 get_char(struct vc_data *vc, u16 *pos, u_char *attribs)
        u16 ch = ' ';
 
        if (vc && pos) {
-               u16 w = scr_readw(pos);
-               u16 c = w & 0xff;
+               u16 w;
+               u16 c;
+
+               pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
+               w = scr_readw(pos);
+               c = w & 0xff;
 
                if (w & vc->vc_hi_font_mask)
                        c |= 0x100;
@@ -746,7 +751,7 @@ static int get_line(struct vc_data *vc)
        u_char tmp2;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
        for (i = 0; i < vc->vc_cols; i++) {
                buf[i] = (u_char) get_char(vc, (u_short *) tmp, &tmp2);
                tmp += 2;
@@ -811,7 +816,7 @@ static int say_from_to(struct vc_data *vc, u_long from, u_long to,
        u_short saved_punc_mask = spk_punc_mask;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) from);
+       spk_attr = get_attributes(vc, (u_short *)from);
        while (from < to) {
                buf[i++] = (char)get_char(vc, (u_short *) from, &tmp);
                from += 2;
@@ -886,7 +891,7 @@ static int get_sentence_buf(struct vc_data *vc, int read_punc)
        sentmarks[bn][0] = &sentbuf[bn][0];
        i = 0;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) start);
+       spk_attr = get_attributes(vc, (u_short *)start);
 
        while (start < end) {
                sentbuf[bn][i] = (char)get_char(vc, (u_short *) start, &tmp);
@@ -1585,7 +1590,7 @@ static int count_highlight_color(struct vc_data *vc)
                u16 *ptr;
 
                for (ptr = start; ptr < end; ptr++) {
-                       ch = get_attributes(ptr);
+                       ch = get_attributes(vc, ptr);
                        bg = (ch & 0x70) >> 4;
                        speakup_console[vc_num]->ht.bgcount[bg]++;
                }
index aa5ab6c80ed4ffefc12aaca9c1337e403371d478..41ef099b7aa69f345d33998c46e2bce11ad691d9 100644 (file)
@@ -142,7 +142,9 @@ static void __speakup_paste_selection(struct work_struct *work)
        struct tty_ldisc *ld;
        DECLARE_WAITQUEUE(wait, current);
 
-       ld = tty_ldisc_ref_wait(tty);
+       ld = tty_ldisc_ref(tty);
+       if (!ld)
+               goto tty_unref;
        tty_buffer_lock_exclusive(&vc->port);
 
        add_wait_queue(&vc->paste_wait, &wait);
@@ -162,6 +164,7 @@ static void __speakup_paste_selection(struct work_struct *work)
 
        tty_buffer_unlock_exclusive(&vc->port);
        tty_ldisc_deref(ld);
+tty_unref:
        tty_kref_put(tty);
 }
 
index 3b5835b2812849f0c32055f065ef0be6a0c4d0d0..a5bbb338f275495c18bf6051adf60ee8a3bbcc1d 100644 (file)
@@ -6,6 +6,11 @@
 #include "spk_priv.h"
 #include "serialio.h"
 
+#include <linux/serial_core.h>
+/* WARNING:  Do not change this to <linux/serial.h> without testing that
+ * SERIAL_PORT_DFNS does get defined to the appropriate value. */
+#include <asm/serial.h>
+
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif
@@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index)
        int baud = 9600, quot = 0;
        unsigned int cval = 0;
        int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8;
-       const struct old_serial_port *ser = rs_table + index;
+       const struct old_serial_port *ser;
        int err;
 
+       if (index >= ARRAY_SIZE(rs_table)) {
+               pr_info("no port info for ttyS%d\n", index);
+               return NULL;
+       }
+       ser = rs_table + index;
+
        /*      Divisor, bytesize and parity */
        quot = ser->baud_base / baud;
        cval = cflag & (CSIZE | CSTOPB);
index ccc0ad02d06698108ec486d11f7b0dc69c7c734a..36fa724a36c851d463afa53a67d1a8526efb75dc 100644 (file)
 /* Braswell thermal reporting device */
 #define PCI_DEVICE_ID_PROC_BSW_THERMAL 0x22DC
 
+/* Broxton thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BXT0_THERMAL  0x0A8C
+#define PCI_DEVICE_ID_PROC_BXT1_THERMAL  0x1A8C
+#define PCI_DEVICE_ID_PROC_BXTX_THERMAL  0x4A8C
+#define PCI_DEVICE_ID_PROC_BXTP_THERMAL  0x5A8C
+
 struct power_config {
        u32     index;
        u32     min_uw;
@@ -404,6 +410,10 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)},
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTX_THERMAL)},
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTP_THERMAL)},
        { 0, },
 };
 
index 50c7da79be830a43b342999ca80ef264431e0078..00d81af648b8ed93a9d7c2e09aef8979f7d0606d 100644 (file)
@@ -136,7 +136,7 @@ struct pch_dev_ops {
 
 
 /* dev ops for Wildcat Point */
-static struct pch_dev_ops pch_dev_ops_wpt = {
+static const struct pch_dev_ops pch_dev_ops_wpt = {
        .hw_init = pch_wpt_init,
        .get_temp = pch_wpt_get_temp,
 };
index 13d01edc7a043812611719bf7a342260f09ababa..44b9c485157d8c6e624548ee7c7cfccacea241d9 100644 (file)
@@ -75,11 +75,11 @@ struct rcar_thermal_priv {
 #define rcar_has_irq_support(priv)     ((priv)->common->base)
 #define rcar_id_to_shift(priv)         ((priv)->id * 8)
 
-#ifdef DEBUG
-# define rcar_force_update_temp(priv)  1
-#else
-# define rcar_force_update_temp(priv)  0
-#endif
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+       { .compatible = "renesas,rcar-thermal", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
 
 /*
  *             basic functions
@@ -203,14 +203,26 @@ err_out_unlock:
 static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
 {
        struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+       int tmp;
+       int ret;
 
-       if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
-               rcar_thermal_update_temp(priv);
+       ret = rcar_thermal_update_temp(priv);
+       if (ret < 0)
+               return ret;
 
        mutex_lock(&priv->lock);
-       *temp =  MCELSIUS((priv->ctemp * 5) - 65);
+       tmp =  MCELSIUS((priv->ctemp * 5) - 65);
        mutex_unlock(&priv->lock);
 
+       if ((tmp < MCELSIUS(-45)) || (tmp > MCELSIUS(125))) {
+               struct device *dev = rcar_priv_to_dev(priv);
+
+               dev_err(dev, "it couldn't measure temperature correctly\n");
+               return -EIO;
+       }
+
+       *temp = tmp;
+
        return 0;
 }
 
@@ -288,6 +300,9 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
        unsigned long flags;
        u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
 
+       if (!rcar_has_irq_support(priv))
+               return;
+
        spin_lock_irqsave(&common->lock, flags);
 
        rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
@@ -299,11 +314,15 @@ static void rcar_thermal_work(struct work_struct *work)
 {
        struct rcar_thermal_priv *priv;
        int cctemp, nctemp;
+       int ret;
 
        priv = container_of(work, struct rcar_thermal_priv, work.work);
 
        rcar_thermal_get_temp(priv->zone, &cctemp);
-       rcar_thermal_update_temp(priv);
+       ret = rcar_thermal_update_temp(priv);
+       if (ret < 0)
+               return;
+
        rcar_thermal_irq_enable(priv);
 
        rcar_thermal_get_temp(priv->zone, &nctemp);
@@ -368,8 +387,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
        struct rcar_thermal_priv *priv;
 
        rcar_thermal_for_each_priv(priv, common) {
-               if (rcar_has_irq_support(priv))
-                       rcar_thermal_irq_disable(priv);
+               rcar_thermal_irq_disable(priv);
                thermal_zone_device_unregister(priv->zone);
        }
 
@@ -441,7 +459,9 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                mutex_init(&priv->lock);
                INIT_LIST_HEAD(&priv->list);
                INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
-               rcar_thermal_update_temp(priv);
+               ret = rcar_thermal_update_temp(priv);
+               if (ret < 0)
+                       goto error_unregister;
 
                priv->zone = thermal_zone_device_register("rcar_thermal",
                                                1, 0, priv,
@@ -453,8 +473,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                        goto error_unregister;
                }
 
-               if (rcar_has_irq_support(priv))
-                       rcar_thermal_irq_enable(priv);
+               rcar_thermal_irq_enable(priv);
 
                list_move_tail(&priv->list, &common->head);
 
@@ -484,12 +503,6 @@ error_unregister:
        return ret;
 }
 
-static const struct of_device_id rcar_thermal_dt_ids[] = {
-       { .compatible = "renesas,rcar-thermal", },
-       {},
-};
-MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
-
 static struct platform_driver rcar_thermal_driver = {
        .driver = {
                .name   = "rcar_thermal",
index e845841ab036cc82033d0a6829058a0f15f2543e..b58e3fb9b31186921e51edd0fa70bfe0e8c88059 100644 (file)
@@ -38,7 +38,7 @@ enum tshut_mode {
 };
 
 /**
- * the system Temperature Sensors tshut(tshut) polarity
+ * The system Temperature Sensors tshut(tshut) polarity
  * the bit 8 is tshut polarity.
  * 0: low active, 1: high active
  */
@@ -57,10 +57,10 @@ enum sensor_id {
 };
 
 /**
-* The conversion table has the adc value and temperature.
-* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
-* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
-*/
+ * The conversion table has the adc value and temperature.
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ */
 enum adc_sort_mode {
        ADC_DECREMENT = 0,
        ADC_INCREMENT,
@@ -72,16 +72,17 @@ enum adc_sort_mode {
  */
 #define SOC_MAX_SENSORS        2
 
+/**
+ * struct chip_tsadc_table: hold information about chip-specific differences
+ * @id: conversion table
+ * @length: size of conversion table
+ * @data_mask: mask to apply on data inputs
+ * @mode: sort mode of this adc variant (incrementing or decrementing)
+ */
 struct chip_tsadc_table {
        const struct tsadc_table *id;
-
-       /* the array table size*/
        unsigned int length;
-
-       /* that analogic mask data */
        u32 data_mask;
-
-       /* the sort mode is adc value that increment or decrement in table */
        enum adc_sort_mode mode;
 };
 
@@ -153,6 +154,7 @@ struct rockchip_thermal_data {
 #define TSADCV2_SHUT_2GPIO_SRC_EN(chn)         BIT(4 + (chn))
 #define TSADCV2_SHUT_2CRU_SRC_EN(chn)          BIT(8 + (chn))
 
+#define TSADCV1_INT_PD_CLEAR_MASK              ~BIT(16)
 #define TSADCV2_INT_PD_CLEAR_MASK              ~BIT(8)
 
 #define TSADCV2_DATA_MASK                      0xfff
@@ -168,6 +170,51 @@ struct tsadc_table {
        int temp;
 };
 
+/**
+ * Note:
+ * Code to Temperature mapping of the Temperature sensor is a piece wise linear
+ * curve.Any temperature, code faling between to 2 give temperatures can be
+ * linearly interpolated.
+ * Code to Temperature mapping should be updated based on sillcon results.
+ */
+static const struct tsadc_table v1_code_table[] = {
+       {TSADCV3_DATA_MASK, -40000},
+       {436, -40000},
+       {431, -35000},
+       {426, -30000},
+       {421, -25000},
+       {416, -20000},
+       {411, -15000},
+       {406, -10000},
+       {401, -5000},
+       {395, 0},
+       {390, 5000},
+       {385, 10000},
+       {380, 15000},
+       {375, 20000},
+       {370, 25000},
+       {364, 30000},
+       {359, 35000},
+       {354, 40000},
+       {349, 45000},
+       {343, 50000},
+       {338, 55000},
+       {333, 60000},
+       {328, 65000},
+       {322, 70000},
+       {317, 75000},
+       {312, 80000},
+       {307, 85000},
+       {301, 90000},
+       {296, 95000},
+       {291, 100000},
+       {286, 105000},
+       {280, 110000},
+       {275, 115000},
+       {270, 120000},
+       {264, 125000},
+};
+
 static const struct tsadc_table v2_code_table[] = {
        {TSADCV2_DATA_MASK, -40000},
        {3800, -40000},
@@ -245,6 +292,44 @@ static const struct tsadc_table v3_code_table[] = {
        {TSADCV3_DATA_MASK, 125000},
 };
 
+static const struct tsadc_table v4_code_table[] = {
+       {TSADCV3_DATA_MASK, -40000},
+       {431, -40000},
+       {426, -35000},
+       {421, -30000},
+       {415, -25000},
+       {410, -20000},
+       {405, -15000},
+       {399, -10000},
+       {394, -5000},
+       {389, 0},
+       {383, 5000},
+       {378, 10000},
+       {373, 15000},
+       {367, 20000},
+       {362, 25000},
+       {357, 30000},
+       {351, 35000},
+       {346, 40000},
+       {340, 45000},
+       {335, 50000},
+       {330, 55000},
+       {324, 60000},
+       {319, 65000},
+       {313, 70000},
+       {308, 75000},
+       {302, 80000},
+       {297, 85000},
+       {291, 90000},
+       {286, 95000},
+       {281, 100000},
+       {275, 105000},
+       {270, 110000},
+       {264, 115000},
+       {259, 120000},
+       {253, 125000},
+};
+
 static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
                                   int temp)
 {
@@ -368,6 +453,14 @@ static void rk_tsadcv2_initialize(void __iomem *regs,
                       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
 }
 
+static void rk_tsadcv1_irq_ack(void __iomem *regs)
+{
+       u32 val;
+
+       val = readl_relaxed(regs + TSADCV2_INT_PD);
+       writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
 static void rk_tsadcv2_irq_ack(void __iomem *regs)
 {
        u32 val;
@@ -429,6 +522,29 @@ static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
        writel_relaxed(val, regs + TSADCV2_INT_EN);
 }
 
+static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
+       .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+       .chn_num = 1, /* one channel for tsadc */
+
+       .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+       .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+       .tshut_temp = 95000,
+
+       .initialize = rk_tsadcv2_initialize,
+       .irq_ack = rk_tsadcv1_irq_ack,
+       .control = rk_tsadcv2_control,
+       .get_temp = rk_tsadcv2_get_temp,
+       .set_tshut_temp = rk_tsadcv2_tshut_temp,
+       .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+       .table = {
+               .id = v1_code_table,
+               .length = ARRAY_SIZE(v1_code_table),
+               .data_mask = TSADCV3_DATA_MASK,
+               .mode = ADC_DECREMENT,
+       },
+};
+
 static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
        .chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
        .chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
@@ -477,7 +593,35 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
        },
 };
 
+static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
+       .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+       .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+       .chn_num = 2, /* two channels for tsadc */
+
+       .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+       .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+       .tshut_temp = 95000,
+
+       .initialize = rk_tsadcv2_initialize,
+       .irq_ack = rk_tsadcv1_irq_ack,
+       .control = rk_tsadcv2_control,
+       .get_temp = rk_tsadcv2_get_temp,
+       .set_tshut_temp = rk_tsadcv2_tshut_temp,
+       .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+       .table = {
+               .id = v4_code_table,
+               .length = ARRAY_SIZE(v4_code_table),
+               .data_mask = TSADCV3_DATA_MASK,
+               .mode = ADC_DECREMENT,
+       },
+};
+
 static const struct of_device_id of_rockchip_thermal_match[] = {
+       {
+               .compatible = "rockchip,rk3228-tsadc",
+               .data = (void *)&rk3228_tsadc_data,
+       },
        {
                .compatible = "rockchip,rk3288-tsadc",
                .data = (void *)&rk3288_tsadc_data,
@@ -486,6 +630,10 @@ static const struct of_device_id of_rockchip_thermal_match[] = {
                .compatible = "rockchip,rk3368-tsadc",
                .data = (void *)&rk3368_tsadc_data,
        },
+       {
+               .compatible = "rockchip,rk3399-tsadc",
+               .data = (void *)&rk3399_tsadc_data,
+       },
        { /* end */ },
 };
 MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -617,7 +765,7 @@ rockchip_thermal_register_sensor(struct platform_device *pdev,
        return 0;
 }
 
-/*
+/**
  * Reset TSADC Controller, reset all tsadc registers.
  */
 static void rockchip_thermal_reset_controller(struct reset_control *reset)
index 2f9f7086ac3dd0d7a3a71015593fffacf8ee586e..ea9366ad3e6bb285e52e368691a0d495cbb3429f 100644 (file)
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
        next_target = instance->target;
        dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
 
+       if (!instance->initialized) {
+               if (throttle) {
+                       next_target = (cur_state + 1) >= instance->upper ?
+                                       instance->upper :
+                                       ((cur_state + 1) < instance->lower ?
+                                       instance->lower : (cur_state + 1));
+               } else {
+                       next_target = THERMAL_NO_TARGET;
+               }
+
+               return next_target;
+       }
+
        switch (trend) {
        case THERMAL_TREND_RAISING:
                if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
                                        old_target, (int)instance->target);
 
-               if (old_target == instance->target)
+               if (instance->initialized && old_target == instance->target)
                        continue;
 
                /* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                        instance->target == THERMAL_NO_TARGET)
                        update_passive_instance(tz, trip_type, -1);
 
-
+               instance->initialized = true;
                instance->cdev->updated = false; /* cdev needs update */
        }
 
index d9e525cc9c1ce24bcd9d55dd59730f375b15d552..a0a8fd1235e2a21cbe5553c440b26eafecf8042e 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/of.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <linux/suspend.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static atomic_t in_suspend;
+
 static struct thermal_governor *def_governor;
 
 static struct thermal_governor *__find_governor(const char *name)
@@ -532,14 +535,31 @@ static void update_temperature(struct thermal_zone_device *tz)
        mutex_unlock(&tz->lock);
 
        trace_thermal_temperature(tz);
-       dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
-                               tz->last_temperature, tz->temperature);
+       if (tz->last_temperature == THERMAL_TEMP_INVALID)
+               dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+                       tz->temperature);
+       else
+               dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+                       tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+       struct thermal_instance *pos;
+
+       tz->temperature = THERMAL_TEMP_INVALID;
+       tz->passive = 0;
+       list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+               pos->initialized = false;
 }
 
 void thermal_zone_device_update(struct thermal_zone_device *tz)
 {
        int count;
 
+       if (atomic_read(&in_suspend))
+               return;
+
        if (!tz->ops->get_temp)
                return;
 
@@ -676,8 +696,12 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
 
        ret = tz->ops->set_trip_temp(tz, trip, temperature);
+       if (ret)
+               return ret;
 
-       return ret ? ret : count;
+       thermal_zone_device_update(tz);
+
+       return count;
 }
 
 static ssize_t
@@ -1321,6 +1345,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
        if (!result) {
                list_add_tail(&dev->tz_node, &tz->thermal_instances);
                list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+               atomic_set(&tz->need_update, 1);
        }
        mutex_unlock(&cdev->lock);
        mutex_unlock(&tz->lock);
@@ -1430,6 +1455,7 @@ __thermal_cooling_device_register(struct device_node *np,
                                  const struct thermal_cooling_device_ops *ops)
 {
        struct thermal_cooling_device *cdev;
+       struct thermal_zone_device *pos = NULL;
        int result;
 
        if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1474,6 +1500,12 @@ __thermal_cooling_device_register(struct device_node *np,
        /* Update binding information for 'this' new cdev */
        bind_cdev(cdev);
 
+       mutex_lock(&thermal_list_lock);
+       list_for_each_entry(pos, &thermal_tz_list, node)
+               if (atomic_cmpxchg(&pos->need_update, 1, 0))
+                       thermal_zone_device_update(pos);
+       mutex_unlock(&thermal_list_lock);
+
        return cdev;
 }
 
@@ -1806,6 +1838,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
        tz->trips = trips;
        tz->passive_delay = passive_delay;
        tz->polling_delay = polling_delay;
+       /* A new thermal zone needs to be updated anyway. */
+       atomic_set(&tz->need_update, 1);
 
        dev_set_name(&tz->device, "thermal_zone%d", tz->id);
        result = device_register(&tz->device);
@@ -1900,7 +1934,10 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
        INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
-       thermal_zone_device_update(tz);
+       thermal_zone_device_reset(tz);
+       /* Update the new thermal zone and mark it as already updated. */
+       if (atomic_cmpxchg(&tz->need_update, 1, 0))
+               thermal_zone_device_update(tz);
 
        return tz;
 
@@ -2140,6 +2177,36 @@ static void thermal_unregister_governors(void)
        thermal_gov_power_allocator_unregister();
 }
 
+static int thermal_pm_notify(struct notifier_block *nb,
+                               unsigned long mode, void *_unused)
+{
+       struct thermal_zone_device *tz;
+
+       switch (mode) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_RESTORE_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               atomic_set(&in_suspend, 1);
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_RESTORE:
+       case PM_POST_SUSPEND:
+               atomic_set(&in_suspend, 0);
+               list_for_each_entry(tz, &thermal_tz_list, node) {
+                       thermal_zone_device_reset(tz);
+                       thermal_zone_device_update(tz);
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+       .notifier_call = thermal_pm_notify,
+};
+
 static int __init thermal_init(void)
 {
        int result;
@@ -2160,6 +2227,11 @@ static int __init thermal_init(void)
        if (result)
                goto exit_netlink;
 
+       result = register_pm_notifier(&thermal_pm_nb);
+       if (result)
+               pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+                       result);
+
        return 0;
 
 exit_netlink:
@@ -2179,6 +2251,7 @@ error:
 
 static void __exit thermal_exit(void)
 {
+       unregister_pm_notifier(&thermal_pm_nb);
        of_thermal_destroy_zones();
        genetlink_exit();
        class_unregister(&thermal_class);
index d7ac1fccd6598a80453dc51da655e37b30b85ac7..749d41abfbabecfc8fc33f54f4bc2e5d680450f8 100644 (file)
@@ -41,6 +41,7 @@ struct thermal_instance {
        struct thermal_zone_device *tz;
        struct thermal_cooling_device *cdev;
        int trip;
+       bool initialized;
        unsigned long upper;    /* Highest cooling state for this trip point */
        unsigned long lower;    /* Lowest cooling state for this trip point */
        unsigned long target;   /* expected cooling state */
index d9a5fc28fef4bbfe56850e258b726ca3464fd82b..b280abaad91b717639011f1407157eab5f3aaad1 100644 (file)
@@ -269,16 +269,13 @@ static void n_tty_check_throttle(struct tty_struct *tty)
 
 static void n_tty_check_unthrottle(struct tty_struct *tty)
 {
-       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-           tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) {
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY) {
                if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE)
                        return;
                if (!tty->count)
                        return;
                n_tty_kick_worker(tty);
-               n_tty_write_wakeup(tty->link);
-               if (waitqueue_active(&tty->link->write_wait))
-                       wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT);
+               tty_wakeup(tty->link);
                return;
        }
 
index 4097f3f65b3bb1bfc3e3d4d6a40f05fb46c70618..e71ec78fc11ea1ab074486c8876fa855337564fb 100644 (file)
@@ -1379,6 +1379,9 @@ ce4100_serial_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_INTEL_BSW_UART1  0x228a
 #define PCI_DEVICE_ID_INTEL_BSW_UART2  0x228c
 
+#define PCI_DEVICE_ID_INTEL_BDW_UART1  0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2  0x9ce4
+
 #define BYT_PRV_CLK                    0x800
 #define BYT_PRV_CLK_EN                 (1 << 0)
 #define BYT_PRV_CLK_M_VAL_SHIFT                1
@@ -1461,11 +1464,13 @@ byt_serial_setup(struct serial_private *priv,
        switch (pdev->device) {
        case PCI_DEVICE_ID_INTEL_BYT_UART1:
        case PCI_DEVICE_ID_INTEL_BSW_UART1:
+       case PCI_DEVICE_ID_INTEL_BDW_UART1:
                rx_param->src_id = 3;
                tx_param->dst_id = 2;
                break;
        case PCI_DEVICE_ID_INTEL_BYT_UART2:
        case PCI_DEVICE_ID_INTEL_BSW_UART2:
+       case PCI_DEVICE_ID_INTEL_BDW_UART2:
                rx_param->src_id = 5;
                tx_param->dst_id = 4;
                break;
@@ -2062,6 +2067,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .subdevice      = PCI_ANY_ID,
                .setup          = byt_serial_setup,
        },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART1,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART2,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
        /*
         * ITE
         */
@@ -5506,6 +5525,16 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
                pbn_byt },
 
+       /* Intel Broadwell */
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+
        /*
         * Intel Quark x1000
         */
index 892c923547450518cb5d7a4714f570a6803253bd..5cec01c75691a6b7ee58ea65346c05572db1bab1 100644 (file)
@@ -1463,13 +1463,13 @@ static int tty_reopen(struct tty_struct *tty)
 {
        struct tty_driver *driver = tty->driver;
 
-       if (!tty->count)
-               return -EIO;
-
        if (driver->type == TTY_DRIVER_TYPE_PTY &&
            driver->subtype == PTY_TYPE_MASTER)
                return -EIO;
 
+       if (!tty->count)
+               return -EAGAIN;
+
        if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
                return -EBUSY;
 
@@ -2065,7 +2065,12 @@ retry_open:
 
                if (tty) {
                        mutex_unlock(&tty_mutex);
-                       tty_lock(tty);
+                       retval = tty_lock_interruptible(tty);
+                       if (retval) {
+                               if (retval == -EINTR)
+                                       retval = -ERESTARTSYS;
+                               goto err_unref;
+                       }
                        /* safe to drop the kref from tty_driver_lookup_tty() */
                        tty_kref_put(tty);
                        retval = tty_reopen(tty);
@@ -2083,7 +2088,11 @@ retry_open:
 
        if (IS_ERR(tty)) {
                retval = PTR_ERR(tty);
-               goto err_file;
+               if (retval != -EAGAIN || signal_pending(current))
+                       goto err_file;
+               tty_free_file(filp);
+               schedule();
+               goto retry_open;
        }
 
        tty_add_file(tty, filp);
@@ -2152,6 +2161,7 @@ retry_open:
        return 0;
 err_unlock:
        mutex_unlock(&tty_mutex);
+err_unref:
        /* after locks to avoid deadlock */
        if (!IS_ERR_OR_NULL(driver))
                tty_driver_kref_put(driver);
@@ -2648,6 +2658,28 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
        return ret;
 }
 
+/**
+ *     tiocgetd        -       get line discipline
+ *     @tty: tty device
+ *     @p: pointer to user data
+ *
+ *     Retrieves the line discipline id directly from the ldisc.
+ *
+ *     Locking: waits for ldisc reference (in case the line discipline
+ *             is changing or the tty is being hungup)
+ */
+
+static int tiocgetd(struct tty_struct *tty, int __user *p)
+{
+       struct tty_ldisc *ld;
+       int ret;
+
+       ld = tty_ldisc_ref_wait(tty);
+       ret = put_user(ld->ops->num, p);
+       tty_ldisc_deref(ld);
+       return ret;
+}
+
 /**
  *     send_break      -       performed time break
  *     @tty: device to break on
@@ -2874,7 +2906,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case TIOCGSID:
                return tiocgsid(tty, real_tty, p);
        case TIOCGETD:
-               return put_user(tty->ldisc->ops->num, (int __user *)p);
+               return tiocgetd(tty, p);
        case TIOCSETD:
                return tiocsetd(tty, p);
        case TIOCVHANGUP:
index 77703a3912075a7b740eb4626a5159d55ea6f57a..d2f3c4cd697f5cd2bae639fd4d88009af5e2733b 100644 (file)
@@ -19,6 +19,14 @@ void __lockfunc tty_lock(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_lock);
 
+int tty_lock_interruptible(struct tty_struct *tty)
+{
+       if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty))
+               return -EIO;
+       tty_kref_get(tty);
+       return mutex_lock_interruptible(&tty->legacy_mutex);
+}
+
 void __lockfunc tty_unlock(struct tty_struct *tty)
 {
        if (WARN(tty->magic != TTY_MAGIC, "U Bad %p\n", tty))
index e7cbc44eef57529a23c479413538f1c2f924b55c..bd51bdd0a7bf2536617b1a27e3c846105c31a821 100644 (file)
@@ -4250,6 +4250,7 @@ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
        return screenpos(vc, 2 * w_offset, viewed);
 }
+EXPORT_SYMBOL_GPL(screen_pos);
 
 void getconsxy(struct vc_data *vc, unsigned char *p)
 {
index 26ca4f910cb020aae539f29b260a98fe1eee92b0..fa4e23930614a47ac14ece43e863286fadb91402 100644 (file)
@@ -428,7 +428,8 @@ static void acm_read_bulk_callback(struct urb *urb)
                set_bit(rb->index, &acm->read_urbs_free);
                dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
                                                        __func__, status);
-               return;
+               if ((status != -ENOENT) || (urb->actual_length == 0))
+                       return;
        }
 
        usb_mark_last_busy(acm->dev);
@@ -1404,6 +1405,8 @@ made_compressed_probe:
                                usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
                                NULL, acm->writesize, acm_write_bulk, snd);
                snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+               if (quirks & SEND_ZERO_PACKET)
+                       snd->urb->transfer_flags |= URB_ZERO_PACKET;
                snd->instance = acm;
        }
 
@@ -1838,6 +1841,11 @@ static const struct usb_device_id acm_ids[] = {
        },
 #endif
 
+       /*Samsung phone in firmware update mode */
+       { USB_DEVICE(0x04e8, 0x685d),
+       .driver_info = IGNORE_DEVICE,
+       },
+
        /* Exclude Infineon Flash Loader utility */
        { USB_DEVICE(0x058b, 0x0041),
        .driver_info = IGNORE_DEVICE,
@@ -1861,6 +1869,10 @@ static const struct usb_device_id acm_ids[] = {
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_CDMA) },
 
+       { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
+       .driver_info = SEND_ZERO_PACKET,
+       },
+
        { }
 };
 
index dd9af38e7cda612106e09c89a298c4397ccf7494..ccfaba9ab4e49cd4a009132397d9467bc949bfd0 100644 (file)
@@ -134,3 +134,4 @@ struct acm {
 #define IGNORE_DEVICE                  BIT(5)
 #define QUIRK_CONTROL_LINE_STATE       BIT(6)
 #define CLEAR_HALT_CONDITIONS          BIT(7)
+#define SEND_ZERO_PACKET               BIT(8)
index 51b436918f7892b1249c7ec1b19dad515a9a5755..350dcd9af5d86eb2181e6cc8f77f25a33f712cce 100644 (file)
@@ -5401,7 +5401,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
        }
 
        bos = udev->bos;
-       udev->bos = NULL;
 
        for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
@@ -5494,8 +5493,11 @@ done:
        usb_set_usb2_hardware_lpm(udev, 1);
        usb_unlocked_enable_lpm(udev);
        usb_enable_ltm(udev);
-       usb_release_bos_descriptor(udev);
-       udev->bos = bos;
+       /* release the new BOS descriptor allocated  by hub_port_init() */
+       if (udev->bos != bos) {
+               usb_release_bos_descriptor(udev);
+               udev->bos = bos;
+       }
        return 0;
 
 re_enumerate:
index 39a0fa8a4c0aea17b9ecf330c127b110b4b54ba4..e991d55914db649804a6357821f42e78355e6e5b 100644 (file)
@@ -572,12 +572,6 @@ static bool dwc2_force_mode(struct dwc2_hsotg *hsotg, bool host)
        set = host ? GUSBCFG_FORCEHOSTMODE : GUSBCFG_FORCEDEVMODE;
        clear = host ? GUSBCFG_FORCEDEVMODE : GUSBCFG_FORCEHOSTMODE;
 
-       /*
-        * If the force mode bit is already set, don't set it.
-        */
-       if ((gusbcfg & set) && !(gusbcfg & clear))
-               return false;
-
        gusbcfg &= ~clear;
        gusbcfg |= set;
        dwc2_writel(gusbcfg, hsotg->regs + GUSBCFG);
@@ -3278,9 +3272,6 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
 /**
  * During device initialization, read various hardware configuration
  * registers and interpret the contents.
- *
- * This should be called during driver probe. It will perform a core
- * soft reset in order to get the reset values of the parameters.
  */
 int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 {
@@ -3288,7 +3279,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
        unsigned width;
        u32 hwcfg1, hwcfg2, hwcfg3, hwcfg4;
        u32 grxfsiz;
-       int retval;
 
        /*
         * Attempt to ensure this device is really a DWC_otg Controller.
@@ -3308,10 +3298,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
                hw->snpsid >> 12 & 0xf, hw->snpsid >> 8 & 0xf,
                hw->snpsid >> 4 & 0xf, hw->snpsid & 0xf, hw->snpsid);
 
-       retval = dwc2_core_reset(hsotg);
-       if (retval)
-               return retval;
-
        hwcfg1 = dwc2_readl(hsotg->regs + GHWCFG1);
        hwcfg2 = dwc2_readl(hsotg->regs + GHWCFG2);
        hwcfg3 = dwc2_readl(hsotg->regs + GHWCFG3);
index 510f787434b3d574e40cfe4a5a41511c92f1ad50..690b9fd98b55165a8b1d6a7b8bce30f730ac6805 100644 (file)
@@ -530,7 +530,13 @@ static int dwc2_driver_probe(struct platform_device *dev)
        if (retval)
                return retval;
 
-       /* Reset the controller and detect hardware config values */
+       /*
+        * Reset before dwc2_get_hwparams() then it could get power-on real
+        * reset value form registers.
+        */
+       dwc2_core_reset_and_force_dr_mode(hsotg);
+
+       /* Detect config values from hardware */
        retval = dwc2_get_hwparams(hsotg);
        if (retval)
                goto error;
index af023a81a0b02160f85e4800594b49aa318190c2..7d1dd82a95ac7325c3da0b45d0d855a31f7f9965 100644 (file)
@@ -2789,6 +2789,7 @@ int dwc3_gadget_init(struct dwc3 *dwc)
        dwc->gadget.speed               = USB_SPEED_UNKNOWN;
        dwc->gadget.sg_supported        = true;
        dwc->gadget.name                = "dwc3-gadget";
+       dwc->gadget.is_otg              = dwc->dr_mode == USB_DR_MODE_OTG;
 
        /*
         * FIXME We might be setting max_speed to <SUPER, however versions
index 0fbfb2b2aa08b3ade8e4b487d012188687b29e05..26ccad5d8680a3a1b5c75b4bf89018340eeb1846 100644 (file)
@@ -673,7 +673,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
        unsigned long           flags;
        int                     tx_list_empty;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        spin_lock_irqsave(&dev->lock, flags);
        tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
        spin_unlock_irqrestore(&dev->lock, flags);
@@ -683,7 +683,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
                wait_event_interruptible(dev->tx_flush_wait,
                                (likely(list_empty(&dev->tx_reqs_active))));
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 365afd7e14f8cade738683a06616b4116fa84fc9..7e179f81d05cae3e91fd8b976a1f2665e3958cc3 100644 (file)
@@ -1521,10 +1521,10 @@ static void destroy_ep_files (struct dev_data *dev)
                spin_unlock_irq (&dev->lock);
 
                /* break link to dcache */
-               mutex_lock (&parent->i_mutex);
+               inode_lock(parent);
                d_delete (dentry);
                dput (dentry);
-               mutex_unlock (&parent->i_mutex);
+               inode_unlock(parent);
 
                spin_lock_irq (&dev->lock);
        }
index f92f5aff0dd5e8ece600cc44b5ba6168e3f1bb81..8755b2c2aadaa60700471f5260408bff6e28b69c 100644 (file)
@@ -91,7 +91,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
        if (!access_ok(VERIFY_WRITE, buf, nbytes))
                return -EFAULT;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        list_for_each_entry_safe(req, tmp_req, queue, queue) {
                len = snprintf(tmpbuf, sizeof(tmpbuf),
                                "%8p %08x %c%c%c %5d %c%c%c\n",
@@ -118,7 +118,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
                nbytes -= len;
                buf += len;
        }
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
 
        return actual;
 }
@@ -143,7 +143,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
        u32 *data;
        int ret = -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        udc = inode->i_private;
        data = kmalloc(inode->i_size, GFP_KERNEL);
        if (!data)
@@ -158,7 +158,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
        ret = 0;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
@@ -169,11 +169,11 @@ static ssize_t regs_dbg_read(struct file *file, char __user *buf,
        struct inode *inode = file_inode(file);
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = simple_read_from_buffer(buf, nbytes, ppos,
                        file->private_data,
                        file_inode(file)->i_size);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index daa563ff1fa05c9c28e2c8a03876c2310aaf0438..1f117c360ebbba3394acd1b934e43632ac2b3531 100644 (file)
@@ -229,6 +229,8 @@ config USB_EHCI_TEGRA
        depends on ARCH_TEGRA
        select USB_EHCI_ROOT_HUB_TT
        select USB_PHY
+       select USB_ULPI
+       select USB_ULPI_VIEWPORT
        help
          This driver enables support for the internal USB Host Controllers
          found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
index 04ce6b156b350e5dd0f9f9a321c79eb4d173b656..e0244fb3903dc85287f509bbac97600734c9b5af 100644 (file)
@@ -112,12 +112,16 @@ static inline int xhci_find_next_ext_cap(void __iomem *base, u32 start, int id)
        offset = start;
        if (!start || start == XHCI_HCC_PARAMS_OFFSET) {
                val = readl(base + XHCI_HCC_PARAMS_OFFSET);
+               if (val == ~0)
+                       return 0;
                offset = XHCI_HCC_EXT_CAPS(val) << 2;
                if (!offset)
                        return 0;
        };
        do {
                val = readl(base + offset);
+               if (val == ~0)
+                       return 0;
                if (XHCI_EXT_CAPS_ID(val) == id && offset != start)
                        return offset;
 
index c30de7c39f44088e302b257a480a6c7b159ff167..73f763c4f5f591a2c19053083dedf7b6aa252f31 100644 (file)
@@ -275,8 +275,9 @@ static bool need_bw_sch(struct usb_host_endpoint *ep,
                return false;
 
        /*
-        * for LS & FS periodic endpoints which its device don't attach
-        * to TT are also ignored, root-hub will schedule them directly
+        * for LS & FS periodic endpoints which its device is not behind
+        * a TT are also ignored, root-hub will schedule them directly,
+        * but need set @bpkts field of endpoint context to 1.
         */
        if (is_fs_or_ls(speed) && !has_tt)
                return false;
@@ -339,8 +340,17 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
                GET_MAX_PACKET(usb_endpoint_maxp(&ep->desc)),
                usb_endpoint_dir_in(&ep->desc), ep);
 
-       if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT))
+       if (!need_bw_sch(ep, udev->speed, slot_ctx->tt_info & TT_SLOT)) {
+               /*
+                * set @bpkts to 1 if it is LS or FS periodic endpoint, and its
+                * device does not connected through an external HS hub
+                */
+               if (usb_endpoint_xfer_int(&ep->desc)
+                       || usb_endpoint_xfer_isoc(&ep->desc))
+                       ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(1));
+
                return 0;
+       }
 
        bw_index = get_bw_index(xhci, udev, ep);
        sch_bw = &sch_array[bw_index];
index c9ab6a44c34aef05968c0b7e053e63cafd392d8f..9532f5aef71bfe310b499db66a266e9a2c7b32f4 100644 (file)
@@ -696,9 +696,24 @@ static int xhci_mtk_remove(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+/*
+ * if ip sleep fails, and all clocks are disabled, access register will hang
+ * AHB bus, so stop polling roothubs to avoid regs access on bus suspend.
+ * and no need to check whether ip sleep failed or not; this will cause SPM
+ * to wake up system immediately after system suspend complete if ip sleep
+ * fails, it is what we wanted.
+ */
 static int xhci_mtk_suspend(struct device *dev)
 {
        struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+       struct usb_hcd *hcd = mtk->hcd;
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+       xhci_dbg(xhci, "%s: stop port polling\n", __func__);
+       clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       del_timer_sync(&hcd->rh_timer);
+       clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+       del_timer_sync(&xhci->shared_hcd->rh_timer);
 
        xhci_mtk_host_disable(mtk);
        xhci_mtk_phy_power_off(mtk);
@@ -710,11 +725,19 @@ static int xhci_mtk_suspend(struct device *dev)
 static int xhci_mtk_resume(struct device *dev)
 {
        struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
+       struct usb_hcd *hcd = mtk->hcd;
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
        usb_wakeup_disable(mtk);
        xhci_mtk_clks_enable(mtk);
        xhci_mtk_phy_power_on(mtk);
        xhci_mtk_host_enable(mtk);
+
+       xhci_dbg(xhci, "%s: restart port polling\n", __func__);
+       set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       usb_hcd_poll_rh_status(hcd);
+       set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+       usb_hcd_poll_rh_status(xhci->shared_hcd);
        return 0;
 }
 
index 58c43ed7ff3b6c5291bab17bb646c2bbefcf67b6..f0640b7a1c42e2b82c4b8c6071fa0c43561ab7e7 100644 (file)
@@ -28,7 +28,9 @@
 #include "xhci.h"
 #include "xhci-trace.h"
 
-#define PORT2_SSIC_CONFIG_REG2 0x883c
+#define SSIC_PORT_NUM          2
+#define SSIC_PORT_CFG2         0x880c
+#define SSIC_PORT_CFG2_OFFSET  0x30
 #define PROG_DONE              (1 << 30)
 #define SSIC_PORT_UNUSED       (1 << 31)
 
@@ -45,6 +47,7 @@
 #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI            0x22b5
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI                0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI       0x9d2f
+#define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI             0x0aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -151,9 +154,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
                (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
                 pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
-                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) {
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+                pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) {
                xhci->quirks |= XHCI_PME_STUCK_QUIRK;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+               xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
+       }
        if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
                        pdev->device == PCI_DEVICE_ID_EJ168) {
                xhci->quirks |= XHCI_RESET_ON_RESUME;
@@ -312,22 +320,20 @@ static void xhci_pci_remove(struct pci_dev *dev)
  * SSIC PORT need to be marked as "unused" before putting xHCI
  * into D3. After D3 exit, the SSIC port need to be marked as "used".
  * Without this change, xHCI might not enter D3 state.
- * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
- * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
  */
-static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
+static void xhci_ssic_port_unused_quirk(struct usb_hcd *hcd, bool suspend)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
-       struct pci_dev          *pdev = to_pci_dev(hcd->self.controller);
        u32 val;
        void __iomem *reg;
+       int i;
 
-       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
-
-               reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2;
+       for (i = 0; i < SSIC_PORT_NUM; i++) {
+               reg = (void __iomem *) xhci->cap_regs +
+                               SSIC_PORT_CFG2 +
+                               i * SSIC_PORT_CFG2_OFFSET;
 
-               /* Notify SSIC that SSIC profile programming is not done */
+               /* Notify SSIC that SSIC profile programming is not done. */
                val = readl(reg) & ~PROG_DONE;
                writel(val, reg);
 
@@ -344,6 +350,17 @@ static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend)
                writel(val, reg);
                readl(reg);
        }
+}
+
+/*
+ * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
+ * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
+ */
+static void xhci_pme_quirk(struct usb_hcd *hcd)
+{
+       struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+       void __iomem *reg;
+       u32 val;
 
        reg = (void __iomem *) xhci->cap_regs + 0x80a4;
        val = readl(reg);
@@ -355,6 +372,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        struct pci_dev          *pdev = to_pci_dev(hcd->self.controller);
+       int                     ret;
 
        /*
         * Systems with the TI redriver that loses port status change events
@@ -364,9 +382,16 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
                pdev->no_d3cold = true;
 
        if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-               xhci_pme_quirk(hcd, true);
+               xhci_pme_quirk(hcd);
+
+       if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+               xhci_ssic_port_unused_quirk(hcd, true);
 
-       return xhci_suspend(xhci, do_wakeup);
+       ret = xhci_suspend(xhci, do_wakeup);
+       if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED))
+               xhci_ssic_port_unused_quirk(hcd, false);
+
+       return ret;
 }
 
 static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
@@ -396,8 +421,11 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL)
                usb_enable_intel_xhci_ports(pdev);
 
+       if (xhci->quirks & XHCI_SSIC_PORT_UNUSED)
+               xhci_ssic_port_unused_quirk(hcd, false);
+
        if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
-               xhci_pme_quirk(hcd, false);
+               xhci_pme_quirk(hcd);
 
        retval = xhci_resume(xhci, hibernated);
        return retval;
index 770b6b08879790ec2754e26fbb424fcd03c1205c..d39d6bf1d090dad06687892ed723ea3a5e993083 100644 (file)
@@ -184,7 +184,8 @@ static int xhci_plat_probe(struct platform_device *pdev)
                struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
 
                /* Just copy data for now */
-               *priv = *priv_match;
+               if (priv_match)
+                       *priv = *priv_match;
        }
 
        if (xhci_plat_type_is(hcd, XHCI_PLAT_TYPE_MARVELL_ARMADA)) {
index f1c21c40b4a674e6a8fad98ac34c0b9385df76be..3915657e6078b66211fd699eab9bf227752f1ffc 100644 (file)
@@ -2193,10 +2193,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                }
        /* Fast path - was this the last TRB in the TD for this URB? */
        } else if (event_trb == td->last_trb) {
-               if (td->urb_length_set && trb_comp_code == COMP_SHORT_TX)
-                       return finish_td(xhci, td, event_trb, event, ep,
-                                        status, false);
-
                if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
                        td->urb->actual_length =
                                td->urb->transfer_buffer_length -
@@ -2248,12 +2244,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                        td->urb->actual_length +=
                                TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
                                EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
-
-               if (trb_comp_code == COMP_SHORT_TX) {
-                       xhci_dbg(xhci, "mid bulk/intr SP, wait for last TRB event\n");
-                       td->urb_length_set = true;
-                       return 0;
-               }
        }
 
        return finish_td(xhci, td, event_trb, event, ep, status, false);
index 26a44c0e969e621be5d2a6e2d947cf9292505716..0c8087d3c3138f72e6eeaedc928c09453ab32285 100644 (file)
@@ -1554,7 +1554,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                "HW died, freeing TD.");
                urb_priv = urb->hcpriv;
-               for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+               for (i = urb_priv->td_cnt;
+                    i < urb_priv->length && xhci->devs[urb->dev->slot_id];
+                    i++) {
                        td = urb_priv->td[i];
                        if (!list_empty(&td->td_list))
                                list_del_init(&td->td_list);
index 9be7348872baab804691c7fd10f31115a0c7e3fe..cc651383ce5a85d713c66b032a0aa30bd4331dc8 100644 (file)
@@ -1631,6 +1631,7 @@ struct xhci_hcd {
 #define XHCI_BROKEN_STREAMS    (1 << 19)
 #define XHCI_PME_STUCK_QUIRK   (1 << 20)
 #define XHCI_MTK_HOST          (1 << 21)
+#define XHCI_SSIC_PORT_UNUSED  (1 << 22)
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
        /* There are two roothubs to keep track of bus suspend info for */
index b2685e75a6835fe2b4615b3f86f7aef378fedcae..3eaa4ba6867d408a516abec7bb4180d5c9f05d40 100644 (file)
@@ -348,7 +348,9 @@ static int ux500_suspend(struct device *dev)
        struct ux500_glue       *glue = dev_get_drvdata(dev);
        struct musb             *musb = glue_to_musb(glue);
 
-       usb_phy_set_suspend(musb->xceiv, 1);
+       if (musb)
+               usb_phy_set_suspend(musb->xceiv, 1);
+
        clk_disable_unprepare(glue->clk);
 
        return 0;
@@ -366,7 +368,8 @@ static int ux500_resume(struct device *dev)
                return ret;
        }
 
-       usb_phy_set_suspend(musb->xceiv, 0);
+       if (musb)
+               usb_phy_set_suspend(musb->xceiv, 0);
 
        return 0;
 }
index 0d19a6d61a71f7ccb7a55e1b90a7d5d5f27db535..970a30e155cb51bfd1d15126d8cdc65f9adf579c 100644 (file)
@@ -1599,6 +1599,8 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
                                                &motg->id.nb);
                if (ret < 0) {
                        dev_err(&pdev->dev, "register ID notifier failed\n");
+                       extcon_unregister_notifier(motg->vbus.extcon,
+                                                  EXTCON_USB, &motg->vbus.nb);
                        return ret;
                }
 
@@ -1660,15 +1662,6 @@ static int msm_otg_probe(struct platform_device *pdev)
        if (!motg)
                return -ENOMEM;
 
-       pdata = dev_get_platdata(&pdev->dev);
-       if (!pdata) {
-               if (!np)
-                       return -ENXIO;
-               ret = msm_otg_read_dt(pdev, motg);
-               if (ret)
-                       return ret;
-       }
-
        motg->phy.otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
                                     GFP_KERNEL);
        if (!motg->phy.otg)
@@ -1710,6 +1703,15 @@ static int msm_otg_probe(struct platform_device *pdev)
        if (!motg->regs)
                return -ENOMEM;
 
+       pdata = dev_get_platdata(&pdev->dev);
+       if (!pdata) {
+               if (!np)
+                       return -ENXIO;
+               ret = msm_otg_read_dt(pdev, motg);
+               if (ret)
+                       return ret;
+       }
+
        /*
         * NOTE: The PHYs can be multiplexed between the chipidea controller
         * and the dwc3 controller, using a single bit. It is important that
@@ -1717,8 +1719,10 @@ static int msm_otg_probe(struct platform_device *pdev)
         */
        if (motg->phy_number) {
                phy_select = devm_ioremap_nocache(&pdev->dev, USB2_PHY_SEL, 4);
-               if (!phy_select)
-                       return -ENOMEM;
+               if (!phy_select) {
+                       ret = -ENOMEM;
+                       goto unregister_extcon;
+               }
                /* Enable second PHY with the OTG port */
                writel(0x1, phy_select);
        }
@@ -1728,7 +1732,8 @@ static int msm_otg_probe(struct platform_device *pdev)
        motg->irq = platform_get_irq(pdev, 0);
        if (motg->irq < 0) {
                dev_err(&pdev->dev, "platform_get_irq failed\n");
-               return motg->irq;
+               ret = motg->irq;
+               goto unregister_extcon;
        }
 
        regs[0].supply = "vddcx";
@@ -1737,7 +1742,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 
        ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(regs), regs);
        if (ret)
-               return ret;
+               goto unregister_extcon;
 
        motg->vddcx = regs[0].consumer;
        motg->v3p3  = regs[1].consumer;
@@ -1834,6 +1839,12 @@ disable_clks:
        clk_disable_unprepare(motg->clk);
        if (!IS_ERR(motg->core_clk))
                clk_disable_unprepare(motg->core_clk);
+unregister_extcon:
+       extcon_unregister_notifier(motg->id.extcon,
+                                  EXTCON_USB_HOST, &motg->id.nb);
+       extcon_unregister_notifier(motg->vbus.extcon,
+                                  EXTCON_USB, &motg->vbus.nb);
+
        return ret;
 }
 
index c2936dc48ca7b45b0e5c186859d2879569719323..00bfea01be6501d9b53ccc7975b069c24bc57d7b 100644 (file)
@@ -220,7 +220,7 @@ static int mxs_phy_hw_init(struct mxs_phy *mxs_phy)
 /* Return true if the vbus is there */
 static bool mxs_phy_get_vbus_status(struct mxs_phy *mxs_phy)
 {
-       unsigned int vbus_value;
+       unsigned int vbus_value = 0;
 
        if (!mxs_phy->regmap_anatop)
                return false;
index 9b90ad747d8766797bd1737ea8f0be9347a5560d..987813b8a7f91e27b6a582a44ca88254dbd49c13 100644 (file)
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
        { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
        { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
+       { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
        { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
        { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
        { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */
index a5a0376bbd48c2596b908aa0409befb1136bf128..8c660ae401d8267f951516e33fe8188f0f23e060 100644 (file)
@@ -824,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
+       { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
        { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 
        /* Papouch devices based on FTDI chip */
index 67c6d446973092ddf98dd703152b7cd0d8dec24c..a84df2513994a57377cc41105f3078e36d2351a2 100644 (file)
  */
 #define RATOC_VENDOR_ID                0x0584
 #define RATOC_PRODUCT_ID_USB60F        0xb020
+#define RATOC_PRODUCT_ID_SCU18 0xb03a
 
 /*
  * Infineon Technologies
index e3c3f57c2d828d42ecb35e12d8a1a048974d9ee6..619607323bfdfb0a678e470ed31757ba6028676d 100644 (file)
@@ -368,6 +368,16 @@ static int mxu1_port_probe(struct usb_serial_port *port)
        return 0;
 }
 
+static int mxu1_port_remove(struct usb_serial_port *port)
+{
+       struct mxu1_port *mxport;
+
+       mxport = usb_get_serial_port_data(port);
+       kfree(mxport);
+
+       return 0;
+}
+
 static int mxu1_startup(struct usb_serial *serial)
 {
        struct mxu1_device *mxdev;
@@ -427,6 +437,14 @@ err_free_mxdev:
        return err;
 }
 
+static void mxu1_release(struct usb_serial *serial)
+{
+       struct mxu1_device *mxdev;
+
+       mxdev = usb_get_serial_data(serial);
+       kfree(mxdev);
+}
+
 static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
                           u8 mask, u8 byte)
 {
@@ -957,7 +975,9 @@ static struct usb_serial_driver mxu11x0_device = {
        .id_table               = mxu1_idtable,
        .num_ports              = 1,
        .port_probe             = mxu1_port_probe,
+       .port_remove            = mxu1_port_remove,
        .attach                 = mxu1_startup,
+       .release                = mxu1_release,
        .open                   = mxu1_open,
        .close                  = mxu1_close,
        .ioctl                  = mxu1_ioctl,
index f2280606b73c0c897d83d7738747b3d6f97a2c71..db86e512e0fcb3af0548eb9907b04ce68db3a8dc 100644 (file)
@@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_CC864_SINGLE             0x1006
 #define TELIT_PRODUCT_DE910_DUAL               0x1010
 #define TELIT_PRODUCT_UE910_V2                 0x1012
+#define TELIT_PRODUCT_LE922_USBCFG0            0x1042
+#define TELIT_PRODUCT_LE922_USBCFG3            0x1043
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 
@@ -615,6 +617,16 @@ static const struct option_blacklist_info telit_le920_blacklist = {
        .reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+       .sendsetup = BIT(2),
+       .reserved = BIT(0) | BIT(1) | BIT(3),
+};
+
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1160,6 +1172,10 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
                .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1679,7 +1695,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
-       { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) },
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 
index 60afb39eb73c0b95d261ec367890a306621fd2de..337a0be89fcf0767f67731a9b0a50700e1df9318 100644 (file)
@@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial)
                (serial->num_interrupt_in == 0))
                return 0;
 
+       if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) {
+               dev_err(&serial->interface->dev, "missing endpoints\n");
+               return -ENODEV;
+       }
+
        /*
        * It appears that Treos and Kyoceras want to use the
        * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint,
@@ -597,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial)
         */
 
        /* some sanity check */
-       if (serial->num_ports < 2)
-               return -1;
+       if (serial->num_bulk_out < 2) {
+               dev_err(&serial->interface->dev, "missing bulk out endpoints\n");
+               return -ENODEV;
+       }
 
        /* port 0 now uses the modified endpoint Address */
        port = serial->port[0];
index 82f25cc1c460dee73c37b2b19895fb7d89bfb0d3..ecca316386f57fa64d04746d4b8326fa4c241ebd 100644 (file)
@@ -123,8 +123,8 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
        /*
         * With noiommu enabled, an IOMMU group will be created for a device
         * that doesn't already have one and doesn't have an iommu_ops on their
-        * bus.  We use iommu_present() again in the main code to detect these
-        * fake groups.
+        * bus.  We set iommudata simply to be able to identify these groups
+        * as special use and for reclamation later.
         */
        if (group || !noiommu || iommu_present(dev->bus))
                return group;
@@ -134,6 +134,7 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
                return NULL;
 
        iommu_group_set_name(group, "vfio-noiommu");
+       iommu_group_set_iommudata(group, &noiommu, NULL);
        ret = iommu_group_add_device(group, dev);
        iommu_group_put(group);
        if (ret)
@@ -158,7 +159,7 @@ EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 {
 #ifdef CONFIG_VFIO_NOIOMMU
-       if (!iommu_present(dev->bus))
+       if (iommu_group_get_iommudata(group) == &noiommu)
                iommu_group_remove_device(dev);
 #endif
 
@@ -190,16 +191,10 @@ static long vfio_noiommu_ioctl(void *iommu_data,
        return -ENOTTY;
 }
 
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-       return iommu_present(dev->bus) ? 1 : 0;
-}
-
 static int vfio_noiommu_attach_group(void *iommu_data,
                                     struct iommu_group *iommu_group)
 {
-       return iommu_group_for_each_dev(iommu_group, NULL,
-                                       vfio_iommu_present) ? -EINVAL : 0;
+       return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 }
 
 static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-                                           bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
        struct vfio_group *group, *tmp;
        struct device *dev;
@@ -342,7 +336,9 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
        atomic_set(&group->container_users, 0);
        atomic_set(&group->opened, 0);
        group->iommu_group = iommu_group;
-       group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+       group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
 
        group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -767,7 +763,7 @@ int vfio_add_group_dev(struct device *dev,
 
        group = vfio_group_get_from_iommu(iommu_group);
        if (!group) {
-               group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+               group = vfio_create_group(iommu_group);
                if (IS_ERR(group)) {
                        iommu_group_put(iommu_group);
                        return PTR_ERR(group);
index 3fc63c208d08ba9fa801570d830220106aa24f21..57721c73177fa1184d253155a6bcce39bdb2e927 100644 (file)
@@ -78,13 +78,13 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
        if (!info->fbdefio)
                return 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Kill off the delayed work */
        cancel_delayed_work_sync(&info->deferred_work);
 
        /* Run it immediately */
        schedule_delayed_work(&info->deferred_work, 0);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 36205c27c4d0f19ab61a4aa4175de3dd8785de66..f6bed86c17f96f16cfa75f35a15b790287e7ab1f 100644 (file)
@@ -545,6 +545,7 @@ err_enable_device:
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
        struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+       struct device *dev = get_device(&vp_dev->vdev.dev);
 
        unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
                virtio_pci_modern_remove(vp_dev);
 
        pci_disable_device(pci_dev);
+       put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {
index 4f0e7be0da346c90d8559d9653bd41927f722b22..0f6d8515ba4f1d06144bb67c12ef26ef4a545da8 100644 (file)
@@ -145,7 +145,8 @@ config MENF21BMC_WATCHDOG
 config TANGOX_WATCHDOG
        tristate "Sigma Designs SMP86xx/SMP87xx watchdog"
        select WATCHDOG_CORE
-       depends on ARCH_TANGOX || COMPILE_TEST
+       depends on ARCH_TANGO || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Support for the watchdog in Sigma Designs SMP86xx (tango3)
          and SMP87xx (tango4) family chips.
@@ -618,6 +619,7 @@ config DIGICOLOR_WATCHDOG
 config LPC18XX_WATCHDOG
        tristate "LPC18xx/43xx Watchdog"
        depends on ARCH_LPC18XX || COMPILE_TEST
+       depends on HAS_IOMEM
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -1374,6 +1376,7 @@ config BCM_KONA_WDT_DEBUG
 config BCM7038_WDT
        tristate "BCM7038 Watchdog"
        select WATCHDOG_CORE
+       depends on HAS_IOMEM
        help
         Watchdog driver for the built-in hardware in Broadcom 7038 SoCs.
 
@@ -1383,6 +1386,7 @@ config IMGPDC_WDT
        tristate "Imagination Technologies PDC Watchdog Timer"
        depends on HAS_IOMEM
        depends on METAG || MIPS || COMPILE_TEST
+       select WATCHDOG_CORE
        help
          Driver for Imagination Technologies PowerDown Controller
          Watchdog Timer.
index f36ca4be07207a9fd200d92577e9242a10e774df..ac5840d9689aed0f79fafa82e6a56e4515bb1b27 100644 (file)
@@ -292,4 +292,4 @@ MODULE_PARM_DESC(nodelay,
                 "Force selection of a timeout setting without initial delay "
                 "(max6373/74 only, default=0)");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 1a11aedc4fe850738bfe17d469232da64959b84d..68952d9ccf8394412654d76fc2c14edef3f74124 100644 (file)
@@ -608,7 +608,7 @@ static int usb_pcwd_probe(struct usb_interface *interface,
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *endpoint;
        struct usb_pcwd_private *usb_pcwd = NULL;
-       int pipe, maxp;
+       int pipe;
        int retval = -ENOMEM;
        int got_fw_rev;
        unsigned char fw_rev_major, fw_rev_minor;
@@ -641,7 +641,6 @@ static int usb_pcwd_probe(struct usb_interface *interface,
 
        /* get a handle to the interrupt data pipe */
        pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
-       maxp = usb_maxpacket(udev, pipe, usb_pipeout(pipe));
 
        /* allocate memory for our device and initialize it */
        usb_pcwd = kzalloc(sizeof(struct usb_pcwd_private), GFP_KERNEL);
index 01d816251302c2491c24a70e8f7c542b61c2f15a..e7a715e820217eb82a90cc8e5eed54d6d826e22c 100644 (file)
@@ -139,12 +139,11 @@ static int wdt_config(struct watchdog_device *wdd, bool ping)
 
        writel_relaxed(UNLOCK, wdt->base + WDTLOCK);
        writel_relaxed(wdt->load_val, wdt->base + WDTLOAD);
+       writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
 
-       if (!ping) {
-               writel_relaxed(INT_MASK, wdt->base + WDTINTCLR);
+       if (!ping)
                writel_relaxed(INT_ENABLE | RESET_ENABLE, wdt->base +
                                WDTCONTROL);
-       }
 
        writel_relaxed(LOCK, wdt->base + WDTLOCK);
 
index 945fc43272017cfe05bab171eb8b97e62057244c..4ac2ca8a76561952798f7c49bdd292719d0439a8 100644 (file)
@@ -242,7 +242,7 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
        return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
        .put_page = tmem_cleancache_put_page,
        .get_page = tmem_cleancache_get_page,
        .invalidate_page = tmem_cleancache_flush_page,
index 7bf835f85bc822ef1119b639be82619af066d326..eadc894faea2ead1f720ad22da63a42889967ebe 100644 (file)
@@ -449,14 +449,14 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
        if (retval)
                return retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
 
        fid = filp->private_data;
        v9fs_blank_wstat(&wstat);
 
        retval = p9_client_wstat(fid, &wstat);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return retval;
 }
@@ -472,13 +472,13 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
        if (retval)
                return retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
 
        fid = filp->private_data;
 
        retval = p9_client_fsync(fid, datasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return retval;
 }
index 659c579c4588b2a5e844d0e82a0195494d70a073..0548c53f41d51902a4e0ce7b983f7cd365f656f9 100644 (file)
@@ -33,11 +33,11 @@ affs_file_release(struct inode *inode, struct file *filp)
                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
        if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (inode->i_size != AFFS_I(inode)->mmu_private)
                        affs_truncate(inode);
                affs_free_prealloc(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        return 0;
@@ -958,12 +958,12 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = write_inode_now(inode, 0);
        err = sync_blockdev(inode->i_sb->s_bdev);
        if (!ret)
                ret = err;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 const struct file_operations affs_file_operations = {
index 4baf1d2b39e410ffb501c2aba457fdceadcc2cdb..d91a9c9cfbd0a9aa0770ad394af0c156057a8a85 100644 (file)
@@ -483,7 +483,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 
        fl->fl_type = F_UNLCK;
 
-       mutex_lock(&vnode->vfs_inode.i_mutex);
+       inode_lock(&vnode->vfs_inode);
 
        /* check local lock records first */
        ret = 0;
@@ -505,7 +505,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
        }
 
 error:
-       mutex_unlock(&vnode->vfs_inode.i_mutex);
+       inode_unlock(&vnode->vfs_inode);
        _leave(" = %d [%hd]", ret, fl->fl_type);
        return ret;
 }
index 0714abcd7f32321754287e46aec129196832e2ef..dfef94f70667cde167dad60d71d1e23622c24cf7 100644 (file)
@@ -693,7 +693,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
                return ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* use a writeback record as a marker in the queue - when this reaches
         * the front of the queue, all the outstanding writes are either
@@ -735,7 +735,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        afs_put_writeback(wb);
        _leave(" = %d", ret);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 6530ced19697d49a9189fa289c9112187448fee3..25b24d0f6c8810c86ef79319e091fc65231733d0 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -195,7 +195,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
 
        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
index 3a93755e880fee23fa6d8370916caefb21f2c9eb..051ea4809c14037fd0b1efffccb12c3909b3e46b 100644 (file)
@@ -491,6 +491,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
  * arch_check_elf() - check an ELF executable
  * @ehdr:      The main ELF header
  * @has_interp:        True if the ELF has an interpreter, else false.
+ * @interp_ehdr: The interpreter's ELF header
  * @state:     Architecture-specific state preserved throughout the process
  *             of loading the ELF.
  *
@@ -502,6 +503,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
  *         with that return code.
  */
 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+                                struct elfhdr *interp_ehdr,
                                 struct arch_elf_state *state)
 {
        /* Dummy implementation, always proceed */
@@ -829,7 +831,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
         * still possible to return an error to the code that invoked
         * the exec syscall.
         */
-       retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+       retval = arch_check_elf(&loc->elf_ex,
+                               !!interpreter, &loc->interp_elf_ex,
+                               &arch_state);
        if (retval)
                goto out_free_dentry;
 
index 78f005f378476011a43c41e7d0eaa8d99ac1af21..3a3ced779fc738199cb5bb3d25a8f8c76edc754d 100644 (file)
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete this handler. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&d_inode(root)->i_mutex);
+               inode_lock(d_inode(root));
 
                kill_node(e);
 
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                dput(root);
                break;
        default:
@@ -675,7 +675,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                return PTR_ERR(e);
 
        root = dget(sb->s_root);
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        dentry = lookup_one_len(e->name, root, strlen(e->name));
        err = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 out2:
        dput(dentry);
 out:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        dput(root);
 
        if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete all handlers. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&d_inode(root)->i_mutex);
+               inode_lock(d_inode(root));
 
                while (!list_empty(&entries))
                        kill_node(list_entry(entries.next, Node, list));
 
-               mutex_unlock(&d_inode(root)->i_mutex);
+               inode_unlock(d_inode(root));
                dput(root);
                break;
        default:
index ba762ea07f679c9bafd078bb316270a38e9a7d80..39b3a174a4253974b4635bee951e2283cf71b9cf 100644 (file)
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
 
-       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+       if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
                return;
 
        invalidate_bh_lrus();
@@ -346,9 +346,9 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
        struct inode *bd_inode = bdev_file_inode(file);
        loff_t retval;
 
-       mutex_lock(&bd_inode->i_mutex);
+       inode_lock(bd_inode);
        retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
-       mutex_unlock(&bd_inode->i_mutex);
+       inode_unlock(bd_inode);
        return retval;
 }
        
@@ -1142,9 +1142,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 {
        unsigned bsize = bdev_logical_block_size(bdev);
 
-       mutex_lock(&bdev->bd_inode->i_mutex);
+       inode_lock(bdev->bd_inode);
        i_size_write(bdev->bd_inode, size);
-       mutex_unlock(&bdev->bd_inode->i_mutex);
+       inode_unlock(bdev->bd_inode);
        while (bsize < PAGE_CACHE_SIZE) {
                if (size & bsize)
                        break;
@@ -1730,43 +1730,25 @@ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return __dax_fault(vma, vmf, blkdev_get_block, NULL);
 }
 
-static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
-               pmd_t *pmd, unsigned int flags)
+static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
+               struct vm_fault *vmf)
 {
-       return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
-}
-
-static void blkdev_vm_open(struct vm_area_struct *vma)
-{
-       struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-       struct block_device *bdev = I_BDEV(bd_inode);
-
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count++;
-       mutex_unlock(&bd_inode->i_mutex);
+       return dax_pfn_mkwrite(vma, vmf);
 }
 
-static void blkdev_vm_close(struct vm_area_struct *vma)
+static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+               pmd_t *pmd, unsigned int flags)
 {
-       struct inode *bd_inode = bdev_file_inode(vma->vm_file);
-       struct block_device *bdev = I_BDEV(bd_inode);
-
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count--;
-       mutex_unlock(&bd_inode->i_mutex);
+       return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
 }
 
 static const struct vm_operations_struct blkdev_dax_vm_ops = {
-       .open           = blkdev_vm_open,
-       .close          = blkdev_vm_close,
        .fault          = blkdev_dax_fault,
        .pmd_fault      = blkdev_dax_pmd_fault,
-       .pfn_mkwrite    = blkdev_dax_fault,
+       .pfn_mkwrite    = blkdev_dax_pfn_mkwrite,
 };
 
 static const struct vm_operations_struct blkdev_default_vm_ops = {
-       .open           = blkdev_vm_open,
-       .close          = blkdev_vm_close,
        .fault          = filemap_fault,
        .map_pages      = filemap_map_pages,
 };
@@ -1774,18 +1756,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
 static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct inode *bd_inode = bdev_file_inode(file);
-       struct block_device *bdev = I_BDEV(bd_inode);
 
        file_accessed(file);
-       mutex_lock(&bd_inode->i_mutex);
-       bdev->bd_map_count++;
        if (IS_DAX(bd_inode)) {
                vma->vm_ops = &blkdev_dax_vm_ops;
                vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
        } else {
                vma->vm_ops = &blkdev_default_vm_ops;
        }
-       mutex_unlock(&bd_inode->i_mutex);
 
        return 0;
 }
index 88d9af3d4581f7c45aa0065fa467d097625be5c1..5fb60ea7eee2b2c28e067d11b54c2fad7127c7d8 100644 (file)
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
                list_add_tail(&work->ordered_list, &wq->ordered_list);
                spin_unlock_irqrestore(&wq->list_lock, flags);
        }
-       queue_work(wq->normal_wq, &work->normal_work);
        trace_btrfs_work_queued(work);
+       queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
index 08405a3da6b1baf9a382327d72dc5a5a85d0c68d..b90cd3776f8e0a41635ce7cf5447e7b9652e6cca 100644 (file)
@@ -560,13 +560,13 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
  */
 static void __merge_refs(struct list_head *head, int mode)
 {
-       struct __prelim_ref *ref1;
+       struct __prelim_ref *pos1;
 
-       list_for_each_entry(ref1, head, list) {
-               struct __prelim_ref *ref2 = ref1, *tmp;
+       list_for_each_entry(pos1, head, list) {
+               struct __prelim_ref *pos2 = pos1, *tmp;
 
-               list_for_each_entry_safe_continue(ref2, tmp, head, list) {
-                       struct __prelim_ref *xchg;
+               list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+                       struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
                        struct extent_inode_elem *eie;
 
                        if (!ref_for_same_block(ref1, ref2))
index 97ad9bbeb35d24ec0ad228aa6f040b61bffb33a5..bfe4a337fb4d13a058446265b7baf4a1437aa602 100644 (file)
@@ -1614,7 +1614,7 @@ struct btrfs_fs_info {
 
        spinlock_t delayed_iput_lock;
        struct list_head delayed_iputs;
-       struct rw_semaphore delayed_iput_sem;
+       struct mutex cleaner_delayed_iput_mutex;
 
        /* this protects tree_mod_seq_list */
        spinlock_t tree_mod_seq_lock;
@@ -3641,6 +3641,7 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 int __get_raid_index(u64 flags);
 int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
 void check_system_chunk(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        const u64 type);
index 1e668fb7dd4c73dbc03b7f642660f2af20bd2ddb..cbb7dbfb3fffffc54f621b8dc43a7b1ec9a1185b 100644 (file)
@@ -614,7 +614,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
                em = lookup_extent_mapping(em_tree, start, (u64)-1);
                if (!em)
                        break;
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                for (i = 0; i < map->num_stripes; i++)
                        if (srcdev == map->stripes[i].dev)
                                map->stripes[i].dev = tgtdev;
index e99ccd6ffb2c14f58bf38f548e202f0e0f86e9ea..4545e2e2ad45a556113eecf30b1a7d49ee7ca260 100644 (file)
 #include <asm/cpufeature.h>
 #endif
 
+#define BTRFS_SUPER_FLAG_SUPP  (BTRFS_HEADER_FLAG_WRITTEN |\
+                                BTRFS_HEADER_FLAG_RELOC |\
+                                BTRFS_SUPER_FLAG_ERROR |\
+                                BTRFS_SUPER_FLAG_SEEDING |\
+                                BTRFS_SUPER_FLAG_METADUMP)
+
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
@@ -176,6 +182,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
        { .id = BTRFS_UUID_TREE_OBJECTID,       .name_stem = "uuid"     },
+       { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
        { .id = 0,                              .name_stem = "tree"     },
 };
 
@@ -1583,8 +1590,23 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
                goto free_writers;
+
+       mutex_lock(&root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(root,
+                                       &root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&root->objectid_mutex);
+               goto free_root_dev;
+       }
+
+       ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&root->objectid_mutex);
+
        return 0;
 
+free_root_dev:
+       free_anon_bdev(root->anon_dev);
 free_writers:
        btrfs_free_subvolume_writers(root->subv_writers);
 fail:
@@ -1766,7 +1788,6 @@ static int cleaner_kthread(void *arg)
        int again;
        struct btrfs_trans_handle *trans;
 
-       set_freezable();
        do {
                again = 0;
 
@@ -1786,7 +1807,10 @@ static int cleaner_kthread(void *arg)
                        goto sleep;
                }
 
+               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(root);
+               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -2556,8 +2580,8 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
+       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
        seqlock_init(&fs_info->profiles_lock);
-       init_rwsem(&fs_info->delayed_iput_sem);
 
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
@@ -2742,26 +2766,6 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
-       /*
-        * Leafsize and nodesize were always equal, this is only a sanity check.
-        */
-       if (le32_to_cpu(disk_super->__unused_leafsize) !=
-           btrfs_super_nodesize(disk_super)) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksizes don't match.  node %d leaf %d\n",
-                      btrfs_super_nodesize(disk_super),
-                      le32_to_cpu(disk_super->__unused_leafsize));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-       if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksize (%d) was too large\n",
-                      btrfs_super_nodesize(disk_super));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-
        features = btrfs_super_incompat_flags(disk_super);
        features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
        if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
@@ -2833,17 +2837,6 @@ int open_ctree(struct super_block *sb,
        sb->s_blocksize = sectorsize;
        sb->s_blocksize_bits = blksize_bits(sectorsize);
 
-       if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
-               goto fail_sb_buffer;
-       }
-
-       if (sectorsize != PAGE_SIZE) {
-               printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
-                      "found on %s\n", (unsigned long)sectorsize, sb->s_id);
-               goto fail_sb_buffer;
-       }
-
        mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
@@ -2915,6 +2908,18 @@ retry_root_backup:
        tree_root->commit_root = btrfs_root_node(tree_root);
        btrfs_set_root_refs(&tree_root->root_item, 1);
 
+       mutex_lock(&tree_root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(tree_root,
+                                       &tree_root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&tree_root->objectid_mutex);
+               goto recovery_tree_root;
+       }
+
+       ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&tree_root->objectid_mutex);
+
        ret = btrfs_read_roots(fs_info, tree_root);
        if (ret)
                goto recovery_tree_root;
@@ -4018,8 +4023,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                              int read_only)
 {
        struct btrfs_super_block *sb = fs_info->super_copy;
+       u64 nodesize = btrfs_super_nodesize(sb);
+       u64 sectorsize = btrfs_super_sectorsize(sb);
        int ret = 0;
 
+       if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+               printk(KERN_ERR "BTRFS: no valid FS found\n");
+               ret = -EINVAL;
+       }
+       if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+               printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+                               btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
        if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
                printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
                                btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4037,31 +4051,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        }
 
        /*
-        * The common minimum, we don't know if we can trust the nodesize/sectorsize
-        * items yet, they'll be verified later. Issue just a warning.
+        * Check sectorsize and nodesize first, other check will need it.
+        * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
         */
-       if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+       if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+           sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+               ret = -EINVAL;
+       }
+       /* Only PAGE SIZE is supported yet */
+       if (sectorsize != PAGE_CACHE_SIZE) {
+               printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+                               sectorsize, PAGE_CACHE_SIZE);
+               ret = -EINVAL;
+       }
+       if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+           nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+               ret = -EINVAL;
+       }
+       if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+               printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+                               le32_to_cpu(sb->__unused_leafsize),
+                               nodesize);
+               ret = -EINVAL;
+       }
+
+       /* Root alignment check */
+       if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
                printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
                                btrfs_super_root(sb));
-       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+               ret = -EINVAL;
+       }
+       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
                printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
                                btrfs_super_chunk_root(sb));
-       if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
-               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
-                               btrfs_super_log_root(sb));
-
-       /*
-        * Check the lower bound, the alignment and other constraints are
-        * checked later.
-        */
-       if (btrfs_super_nodesize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
-                               btrfs_super_nodesize(sb));
                ret = -EINVAL;
        }
-       if (btrfs_super_sectorsize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
-                               btrfs_super_sectorsize(sb));
+       if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+                               btrfs_super_log_root(sb));
                ret = -EINVAL;
        }
 
index 60cc1399c64f9de26ab9fa6efcaf3e8b0e067829..e2287c7c10bebbbb6dc68f0c4aed056a97bdaafe 100644 (file)
@@ -4139,8 +4139,10 @@ commit_trans:
                    !atomic_read(&root->fs_info->open_ioctl_trans)) {
                        need_commit--;
 
-                       if (need_commit > 0)
+                       if (need_commit > 0) {
+                               btrfs_start_delalloc_roots(fs_info, 0, -1);
                                btrfs_wait_ordered_roots(fs_info, -1);
+                       }
 
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
@@ -4153,11 +4155,12 @@ commit_trans:
                                if (ret)
                                        return ret;
                                /*
-                                * make sure that all running delayed iput are
-                                * done
+                                * The cleaner kthread might still be doing iput
+                                * operations. Wait for it to finish so that
+                                * more space is released.
                                 */
-                               down_write(&root->fs_info->delayed_iput_sem);
-                               up_write(&root->fs_info->delayed_iput_sem);
+                               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
+                               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
                                goto again;
                        } else {
                                btrfs_end_transaction(trans, root);
@@ -10399,7 +10402,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
         * more device items and remove one chunk item), but this is done at
         * btrfs_remove_chunk() through a call to check_system_chunk().
         */
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        num_items = 3 + map->num_stripes;
        free_extent_map(em);
 
@@ -10586,7 +10589,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 
        disk_super = fs_info->super_copy;
        if (!btrfs_super_root(disk_super))
-               return 1;
+               return -EINVAL;
 
        features = btrfs_super_incompat_flags(disk_super);
        if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
@@ -10816,3 +10819,23 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
        }
        return 1;
 }
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+       schedule();
+       return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+       while (true) {
+               int ret;
+
+               ret = btrfs_start_write_no_snapshoting(root);
+               if (ret)
+                       break;
+               wait_on_atomic_t(&root->will_be_snapshoted,
+                                wait_snapshoting_atomic_t,
+                                TASK_UNINTERRUPTIBLE);
+       }
+}
index 6a98bddd8f33a96e82d0fe856763e53b67e9b064..84fb56d5c018dbfea2a4b1970e4275ada5daa0fb 100644 (file)
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
                WARN_ON(extent_map_in_tree(em));
                WARN_ON(!list_empty(&em->list));
                if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
-                       kfree(em->bdev);
+                       kfree(em->map_lookup);
                kmem_cache_free(extent_map_cache, em);
        }
 }
index b2991fd8583efe8ed92de32904eb510542bc1f66..eb8b8fae036bc3c67ceea03220cdca503626546f 100644 (file)
@@ -32,7 +32,15 @@ struct extent_map {
        u64 block_len;
        u64 generation;
        unsigned long flags;
-       struct block_device *bdev;
+       union {
+               struct block_device *bdev;
+
+               /*
+                * used for chunk mappings
+                * flags & EXTENT_FLAG_FS_MAPPING must be set
+                */
+               struct map_lookup *map_lookup;
+       };
        atomic_t refs;
        unsigned int compress_type;
        struct list_head list;
index 83d7859d76199d96ec882c35c87dea98526d229e..098bb8f690c992e1ebd01270f49ff2d37e6658bd 100644 (file)
@@ -406,8 +406,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
 /* simple helper to fault in pages and copy.  This should go away
  * and be replaced with calls into generic code.
  */
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
-                                        size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
                                         struct page **prepared_pages,
                                         struct iov_iter *i)
 {
@@ -1588,8 +1587,7 @@ again:
                        ret = 0;
                }
 
-               copied = btrfs_copy_from_user(pos, num_pages,
-                                          write_bytes, pages, i);
+               copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
 
                /*
                 * if we have trouble faulting in the pages, fall
@@ -1764,17 +1762,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        loff_t pos;
        size_t count;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        err = generic_write_checks(iocb, from);
        if (err <= 0) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return err;
        }
 
        current->backing_dev_info = inode_to_bdi(inode);
        err = file_remove_privs(file);
        if (err) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
 
@@ -1785,7 +1783,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         * to stop this write operation to ensure FS consistency.
         */
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                err = -EROFS;
                goto out;
        }
@@ -1806,7 +1804,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                end_pos = round_up(pos + count, root->sectorsize);
                err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out;
                }
        }
@@ -1822,7 +1820,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                        iocb->ki_pos = pos + num_written;
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /*
         * We also have to set last_sub_trans to the current log transid,
@@ -1911,7 +1909,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        atomic_inc(&root->log_batch);
        full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                             &BTRFS_I(inode)->runtime_flags);
@@ -1963,7 +1961,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                ret = start_ordered_ops(inode, start, end);
        }
        if (ret) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
        atomic_inc(&root->log_batch);
@@ -2009,7 +2007,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 */
                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                          &BTRFS_I(inode)->runtime_flags);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
 
@@ -2033,7 +2031,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                goto out;
        }
        trans->sync = true;
@@ -2056,7 +2054,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         * file again, but that will end up using the synchronization
         * inside btrfs_sync_log to keep things safe.
         */
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /*
         * If any of the ordered extents had an error, just return it to user
@@ -2305,7 +2303,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
        ret = find_first_non_hole(inode, &offset, &len);
        if (ret < 0)
@@ -2345,7 +2343,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                truncated_page = true;
                ret = btrfs_truncate_page(inode, offset, 0, 0);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
@@ -2421,7 +2419,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                ret = btrfs_wait_ordered_range(inode, lockstart,
                                               lockend - lockstart + 1);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
@@ -2576,7 +2574,7 @@ out_only_mutex:
                        ret = btrfs_end_transaction(trans, root);
                }
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret && !err)
                err = ret;
        return err;
@@ -2660,7 +2658,7 @@ static long btrfs_fallocate(struct file *file, int mode,
        if (ret < 0)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = inode_newsize_ok(inode, alloc_end);
        if (ret)
                goto out;
@@ -2818,7 +2816,7 @@ out:
         * So this is completely used as cleanup.
         */
        btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_start,
                                       alloc_end - alloc_start);
@@ -2894,7 +2892,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file->f_mapping->host;
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
        case SEEK_END:
        case SEEK_CUR:
@@ -2903,20 +2901,20 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
        case SEEK_DATA:
        case SEEK_HOLE:
                if (offset >= i_size_read(inode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return -ENXIO;
                }
 
                ret = find_desired_extent(inode, &offset, whence);
                if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return ret;
                }
        }
 
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
index 393e36bd5845d996d216e14ef2c6ac6866fce746..53dbeaf6ce941cc7a6a8bf06c6a81df354e9e364 100644 (file)
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+       void *mem;
+
+       /*
+        * The allocation size varies, observed numbers were < 4K up to 16K.
+        * Using vmalloc unconditionally would be too heavy, we'll try
+        * contiguous allocations first.
+        */
+       if  (bitmap_size <= PAGE_SIZE)
+               return kzalloc(bitmap_size, GFP_NOFS);
+
+       mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+       if (mem)
+               return mem;
+
        return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
                         PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
index 8b57c17b3fb3194bfff6ae1e487fe0fa7a4fa887..e50316c4af157183d3862fac8e318abe7eb66ecd 100644 (file)
@@ -515,7 +515,7 @@ out:
        return ret;
 }
 
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 {
        struct btrfs_path *path;
        int ret;
@@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
        int ret;
        mutex_lock(&root->objectid_mutex);
 
-       if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
-               ret = btrfs_find_highest_objectid(root,
-                                                 &root->highest_objectid);
-               if (ret)
-                       goto out;
-       }
-
        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
                ret = -ENOSPC;
                goto out;
index ddb347bfee232ec26543055fbf408bfb92f8028f..c8e864b2d530f88c7e65350ca557e36e6061700e 100644 (file)
@@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
                         struct btrfs_trans_handle *trans);
 
 int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
 
 #endif
index 24783010768680bea28f4f0e5e9aaa2c6a62a41c..5f06eb1f43843055c0373daeb9ad98648865150f 100644 (file)
@@ -3134,7 +3134,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
 
-       down_read(&fs_info->delayed_iput_sem);
        spin_lock(&fs_info->delayed_iput_lock);
        while (!list_empty(&fs_info->delayed_iputs)) {
                struct btrfs_inode *inode;
@@ -3153,7 +3152,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                spin_lock(&fs_info->delayed_iput_lock);
        }
        spin_unlock(&fs_info->delayed_iput_lock);
-       up_read(&root->fs_info->delayed_iput_sem);
 }
 
 /*
@@ -4874,26 +4872,6 @@ next:
        return err;
 }
 
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
-       schedule();
-       return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
-       while (true) {
-               int ret;
-
-               ret = btrfs_start_write_no_snapshoting(root);
-               if (ret)
-                       break;
-               wait_on_atomic_t(&root->will_be_snapshoted,
-                                wait_snapshoting_atomic_t,
-                                TASK_UNINTERRUPTIBLE);
-       }
-}
-
 static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4925,7 +4903,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                 * truncation, it must capture all writes that happened before
                 * this truncation.
                 */
-               wait_for_snapshot_creation(root);
+               btrfs_wait_for_snapshot_creation(root);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
                if (ret) {
                        btrfs_end_write_no_snapshoting(root);
@@ -7138,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (ret)
                return ERR_PTR(ret);
 
-       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em)) {
-               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               return em;
-       }
-
+       /*
+        * Create the ordered extent before the extent map. This is to avoid
+        * races with the fast fsync path that would lead to it logging file
+        * extent items that point to disk extents that were not yet written to.
+        * The fast fsync path collects ordered extents into a local list and
+        * then collects all the new extent maps, so we must create the ordered
+        * extent first and make sure the fast fsync path collects any new
+        * ordered extents after collecting new extent maps as well.
+        * The fsync path simply can not rely on inode_dio_wait() because it
+        * causes deadlock with AIO.
+        */
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               free_extent_map(em);
                return ERR_PTR(ret);
        }
 
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, ins.offset, 0);
+       if (IS_ERR(em)) {
+               struct btrfs_ordered_extent *oe;
+
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+               oe = btrfs_lookup_ordered_extent(inode, start);
+               ASSERT(oe);
+               if (WARN_ON(!oe))
+                       return em;
+               set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+               set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+               btrfs_remove_ordered_extent(inode, oe);
+               /* Once for our lookup and once for the ordered extents tree. */
+               btrfs_put_ordered_extent(oe);
+               btrfs_put_ordered_extent(oe);
+       }
        return em;
 }
 
@@ -8469,7 +8467,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 * not unlock the i_mutex at this case.
                 */
                if (offset + count <= inode->i_size) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        relock = true;
                }
                ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@ -8526,7 +8524,7 @@ out:
        if (wakeup)
                inode_dio_end(inode);
        if (relock)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        return ret;
 }
index 2a47a3148ec80df57150e3f5aa7d321abb8d1ccd..952172ca7e455633c28a79292d18ebbfd68c4d18 100644 (file)
@@ -240,7 +240,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ip_oldflags = ip->flags;
        i_oldflags = inode->i_flags;
@@ -358,7 +358,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        }
 
  out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(file);
        return ret;
 }
@@ -568,6 +568,10 @@ static noinline int create_subvol(struct inode *dir,
                goto fail;
        }
 
+       mutex_lock(&new_root->objectid_mutex);
+       new_root->highest_objectid = new_dirid;
+       mutex_unlock(&new_root->objectid_mutex);
+
        /*
         * insert the directory item
         */
@@ -877,7 +881,7 @@ out_up_read:
 out_dput:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return error;
 }
 
@@ -1389,18 +1393,18 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        ra_index += cluster;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                        BTRFS_I(inode)->force_compress = compress_type;
                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                if (ret < 0) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out_ra;
                }
 
                defrag_count += ret;
                balance_dirty_pages_ratelimited(inode->i_mapping);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                if (newer_than) {
                        if (newer_off == (u64)-1)
@@ -1461,9 +1465,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
 out_ra:
        if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        if (!file)
                kfree(ra);
@@ -2426,7 +2430,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out_dput;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Don't allow to delete a subvolume with send in progress. This is
@@ -2539,7 +2543,7 @@ out_up_write:
                spin_unlock(&dest->root_item_lock);
        }
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!err) {
                d_invalidate(dentry);
                btrfs_invalidate_inodes(dest);
@@ -2555,7 +2559,7 @@ out_unlock_inode:
 out_dput:
        dput(dentry);
 out_unlock_dir:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 out_drop_write:
        mnt_drop_write_file(file);
 out:
@@ -2853,8 +2857,8 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
 
 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
 {
-       mutex_unlock(&inode1->i_mutex);
-       mutex_unlock(&inode2->i_mutex);
+       inode_unlock(inode1);
+       inode_unlock(inode2);
 }
 
 static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
@@ -2862,8 +2866,8 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
        if (inode1 < inode2)
                swap(inode1, inode2);
 
-       mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-       mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(inode1, I_MUTEX_PARENT);
+       inode_lock_nested(inode2, I_MUTEX_CHILD);
 }
 
 static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@ -3022,7 +3026,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                return 0;
 
        if (same_inode) {
-               mutex_lock(&src->i_mutex);
+               inode_lock(src);
 
                ret = extent_same_check_offsets(src, loff, &len, olen);
                if (ret)
@@ -3097,7 +3101,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
        btrfs_cmp_data_free(&cmp);
 out_unlock:
        if (same_inode)
-               mutex_unlock(&src->i_mutex);
+               inode_unlock(src);
        else
                btrfs_double_inode_unlock(src, dst);
 
@@ -3745,7 +3749,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
        if (!same_inode) {
                btrfs_double_inode_lock(src, inode);
        } else {
-               mutex_lock(&src->i_mutex);
+               inode_lock(src);
        }
 
        /* determine range to clone */
@@ -3816,7 +3820,7 @@ out_unlock:
        if (!same_inode)
                btrfs_double_inode_unlock(src, inode);
        else
-               mutex_unlock(&src->i_mutex);
+               inode_unlock(src);
        return ret;
 }
 
index 6d707545f775119e4883378636b5255a9dcc9144..55161369fab14d7d2381c2c3950a576c895be2e8 100644 (file)
@@ -609,13 +609,28 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
        return 1;
 }
 
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+                                 int index)
+{
+       return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+                                    int index)
+{
+       return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
 /*
  * helper to index into the pstripe
  */
 static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
-       index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data, index);
 }
 
 /*
@@ -626,10 +641,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
        if (rbio->nr_data + 1 == rbio->real_stripes)
                return NULL;
-
-       index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
-               PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
 }
 
 /*
@@ -889,6 +901,7 @@ static void raid_write_end_io(struct bio *bio)
 {
        struct btrfs_raid_bio *rbio = bio->bi_private;
        int err = bio->bi_error;
+       int max_errors;
 
        if (err)
                fail_bio_stripe(rbio, bio);
@@ -901,7 +914,9 @@ static void raid_write_end_io(struct bio *bio)
        err = 0;
 
        /* OK, we have read all the stripes we need to. */
-       if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+       max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+                    0 : rbio->bbio->max_errors;
+       if (atomic_read(&rbio->error) > max_errors)
                err = -EIO;
 
        rbio_orig_end_io(rbio, err);
@@ -947,8 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
  */
 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 {
-       unsigned long nr = stripe_len * nr_stripes;
-       return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+       return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
 }
 
 /*
@@ -966,8 +980,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
        void *p;
 
        rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
-                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
-                       GFP_NOFS);
+                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+                      sizeof(long), GFP_NOFS);
        if (!rbio)
                return ERR_PTR(-ENOMEM);
 
@@ -1021,18 +1035,17 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
                if (!page)
                        return -ENOMEM;
                rbio->stripe_pages[i] = page;
-               ClearPageUptodate(page);
        }
        return 0;
 }
 
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
 {
        int i;
        struct page *page;
 
-       i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+       i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
 
        for (; i < rbio->nr_pages; i++) {
                if (rbio->stripe_pages[i])
@@ -1120,18 +1133,6 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
        }
 }
 
-/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
-       int index;
-       index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
-       index += page;
-       return rbio->stripe_pages[index];
-}
-
 /*
  * helper function to walk our bio list and populate the bio_pages array with
  * the result.  This seems expensive, but it is faster than constantly
@@ -1175,7 +1176,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 {
        struct btrfs_bio *bbio = rbio->bbio;
        void *pointers[rbio->real_stripes];
-       int stripe_len = rbio->stripe_len;
        int nr_data = rbio->nr_data;
        int stripe;
        int pagenr;
@@ -1183,7 +1183,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        int q_stripe = -1;
        struct bio_list bio_list;
        struct bio *bio;
-       int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
        int ret;
 
        bio_list_init(&bio_list);
@@ -1226,7 +1225,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        else
                clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
-       for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                struct page *p;
                /* first collect one page from each data stripe */
                for (stripe = 0; stripe < nr_data; stripe++) {
@@ -1268,7 +1267,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
         * everything else.
         */
        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1292,7 +1291,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                if (!bbio->tgtdev_map[stripe])
                        continue;
 
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1506,7 +1505,6 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
        int bios_to_read = 0;
        struct bio_list bio_list;
        int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        int pagenr;
        int stripe;
        struct bio *bio;
@@ -1525,7 +1523,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
         * stripe
         */
        for (stripe = 0; stripe < rbio->nr_data; stripe++) {
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *page;
                        /*
                         * we want to find all the pages missing from
@@ -1801,7 +1799,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        int pagenr, stripe;
        void **pointers;
        int faila = -1, failb = -1;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        struct page *page;
        int err;
        int i;
@@ -1824,7 +1821,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
        index_rbio_pages(rbio);
 
-       for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                /*
                 * Now we just use bitmap to mark the horizontal stripes in
                 * which we have data when doing parity scrub.
@@ -1935,7 +1932,7 @@ pstripe:
                 * other endio functions will fiddle the uptodate bits
                 */
                if (rbio->operation == BTRFS_RBIO_WRITE) {
-                       for (i = 0;  i < nr_pages; i++) {
+                       for (i = 0;  i < rbio->stripe_npages; i++) {
                                if (faila != -1) {
                                        page = rbio_stripe_page(rbio, faila, i);
                                        SetPageUptodate(page);
@@ -2031,7 +2028,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
        int bios_to_read = 0;
        struct bio_list bio_list;
        int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
        int pagenr;
        int stripe;
        struct bio *bio;
@@ -2055,7 +2051,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
                        continue;
                }
 
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        struct page *p;
 
                        /*
@@ -2279,37 +2275,11 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
                        if (!page)
                                return -ENOMEM;
                        rbio->stripe_pages[index] = page;
-                       ClearPageUptodate(page);
                }
        }
        return 0;
 }
 
-/*
- * end io function used by finish_rmw.  When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
-       struct btrfs_raid_bio *rbio = bio->bi_private;
-       int err = bio->bi_error;
-
-       if (bio->bi_error)
-               fail_bio_stripe(rbio, bio);
-
-       bio_put(bio);
-
-       if (!atomic_dec_and_test(&rbio->stripes_pending))
-               return;
-
-       err = 0;
-
-       if (atomic_read(&rbio->error))
-               err = -EIO;
-
-       rbio_orig_end_io(rbio, err);
-}
-
 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                                         int need_check)
 {
@@ -2462,7 +2432,7 @@ submit_write:
                        break;
 
                bio->bi_private = rbio;
-               bio->bi_end_io = raid_write_parity_end_io;
+               bio->bi_end_io = raid_write_end_io;
                submit_bio(WRITE, bio);
        }
        return;
index ef6d8fc85853851da0da03923b75421982d65d8f..2bd0011450df2715ca926a6ccd2d99cb7d40f8b4 100644 (file)
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
            root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
            root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-           root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+           root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+           root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
                return 1;
        return 0;
 }
@@ -3030,7 +3031,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        int ret = 0;
 
        BUG_ON(cluster->start != cluster->boundary[0]);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = btrfs_check_data_free_space(inode, cluster->start,
                                          cluster->end + 1 - cluster->start);
@@ -3057,7 +3058,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        btrfs_free_reserved_data_space(inode, cluster->start,
                                       cluster->end + 1 - cluster->start);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 0c981ebe2acb427d5684bca46cc1281e61753905..92bf5ee732fb0e14f7e330dddaadccb4b70b35cc 100644 (file)
@@ -2813,7 +2813,7 @@ out:
 
 static inline int scrub_calc_parity_bitmap_len(int nsectors)
 {
-       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
 }
 
 static void scrub_parity_get(struct scrub_parity *sparity)
@@ -3458,7 +3458,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
                return ret;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (em->start != chunk_offset)
                goto out;
 
@@ -4279,7 +4279,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
                return PTR_ERR(inode);
 
        /* Avoid truncate/dio/punch hole.. */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        inode_dio_wait(inode);
 
        physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
@@ -4358,7 +4358,7 @@ next_page:
        }
        ret = COPY_COMPLETE;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        iput(inode);
        return ret;
 }
index 9b9eab6d048e93d32963b0c66a6d9ab6c022ee63..d41e09fe8e38d77674862c7fe61f610a81d1159d 100644 (file)
@@ -383,6 +383,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        int ret = 0;
        char *compress_type;
        bool compress_force = false;
+       enum btrfs_compression_type saved_compress_type;
+       bool saved_compress_force;
+       int no_compress = 0;
 
        cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
        if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
@@ -462,6 +465,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        /* Fallthrough */
                case Opt_compress:
                case Opt_compress_type:
+                       saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+                               info->compress_type : BTRFS_COMPRESS_NONE;
+                       saved_compress_force =
+                               btrfs_test_opt(root, FORCE_COMPRESS);
                        if (token == Opt_compress ||
                            token == Opt_compress_force ||
                            strcmp(args[0].from, "zlib") == 0) {
@@ -470,6 +477,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_set_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, NODATACOW);
                                btrfs_clear_opt(info->mount_opt, NODATASUM);
+                               no_compress = 0;
                        } else if (strcmp(args[0].from, "lzo") == 0) {
                                compress_type = "lzo";
                                info->compress_type = BTRFS_COMPRESS_LZO;
@@ -477,25 +485,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_clear_opt(info->mount_opt, NODATACOW);
                                btrfs_clear_opt(info->mount_opt, NODATASUM);
                                btrfs_set_fs_incompat(info, COMPRESS_LZO);
+                               no_compress = 0;
                        } else if (strncmp(args[0].from, "no", 2) == 0) {
                                compress_type = "no";
                                btrfs_clear_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                                compress_force = false;
+                               no_compress++;
                        } else {
                                ret = -EINVAL;
                                goto out;
                        }
 
                        if (compress_force) {
-                               btrfs_set_and_info(root, FORCE_COMPRESS,
-                                                  "force %s compression",
-                                                  compress_type);
+                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
                        } else {
-                               if (!btrfs_test_opt(root, COMPRESS))
-                                       btrfs_info(root->fs_info,
-                                                  "btrfs: use %s compression",
-                                                  compress_type);
                                /*
                                 * If we remount from compress-force=xxx to
                                 * compress=xxx, we need clear FORCE_COMPRESS
@@ -504,6 +508,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 */
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                        }
+                       if ((btrfs_test_opt(root, COMPRESS) &&
+                            (info->compress_type != saved_compress_type ||
+                             compress_force != saved_compress_force)) ||
+                           (!btrfs_test_opt(root, COMPRESS) &&
+                            no_compress == 1)) {
+                               btrfs_info(root->fs_info,
+                                          "%s %s compression",
+                                          (compress_force) ? "force" : "use",
+                                          compress_type);
+                       }
+                       compress_force = false;
                        break;
                case Opt_ssd:
                        btrfs_set_and_info(root, SSD,
index e0ac85949067c30191ce3635e2a65edbe8a56361..539e7b5e3f86a83059f070409fdb36749a958ce4 100644 (file)
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(raid56),
        BTRFS_FEAT_ATTR_PTR(skinny_metadata),
        BTRFS_FEAT_ATTR_PTR(no_holes),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
        NULL
 };
 
@@ -780,6 +782,39 @@ failure:
        return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct kobject *fsid_kobj;
+       u64 features;
+       int ret;
+
+       if (!fs_info)
+               return;
+
+       features = get_features(fs_info, set);
+       ASSERT(bit & supported_feature_masks[set]);
+
+       fs_devs = fs_info->fs_devices;
+       fsid_kobj = &fs_devs->fsid_kobj;
+
+       if (!fsid_kobj->state_initialized)
+               return;
+
+       /*
+        * FIXME: this is too heavy to update just one value, ideally we'd like
+        * to use sysfs_update_group but some refactoring is needed first.
+        */
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
index 9c09522125a6b26d0577e139c38ebad7ca5c8331..d7da1a4c2f6c12d04a8f29e4d8f503943058d46d 100644 (file)
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                            \
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
                                struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
index b1d920b3007017c2014d28217cb5efa1d7ed4d96..0e1e61a7ec23265d292e165fd39f1f8d2080f604 100644 (file)
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-                                               GFP_NOFS);
+                                               GFP_KERNEL);
 
        if (!fs_info)
                return fs_info;
        fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->fs_devices) {
                kfree(fs_info);
                return NULL;
        }
        fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->super_copy) {
                kfree(fs_info->fs_devices);
                kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 {
        struct btrfs_block_group_cache *cache;
 
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
        if (!cache)
                return NULL;
        cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
+                                       GFP_KERNEL);
        if (!cache->free_space_ctl) {
                kfree(cache);
                return NULL;
index e29fa297e053951a2d2d178a8793c9d49e021023..669b58201e36881fb1434d76fd787a45329f042e 100644 (file)
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
         * test.
         */
        for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
                if (!page) {
                        test_msg("Failed to allocate test page\n");
                        ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
         * |--- delalloc ---|
         * |---  search  ---|
         */
-       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
        start = 0;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
                test_msg("Couldn't find the locked page\n");
                goto out_bits;
        }
-       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
         *
         * We are re-using our test_start from above since it works out well.
         */
-       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
        }
        ret = 0;
 out_bits:
-       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
        if (locked_page)
                page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
 
        test_msg("Running extent buffer bitmap tests\n");
 
-       bitmap = kmalloc(len, GFP_NOFS);
+       bitmap = kmalloc(len, GFP_KERNEL);
        if (!bitmap) {
                test_msg("Couldn't allocate test bitmap\n");
                return -ENOMEM;
index 5de55fdd28bcda6aca3d0a2becf5bed8e96f1b63..e2d3da02deee4b73b9f9c96ed1e6cc9f4c1b5700 100644 (file)
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
                               (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
                               EXTENT_DELALLOC | EXTENT_DIRTY |
                               EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
                               BTRFS_MAX_EXTENT_SIZE+8191,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1096,7 +1096,7 @@ out:
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                                NULL, GFP_NOFS);
+                                NULL, GFP_KERNEL);
        iput(inode);
        btrfs_free_dummy_root(root);
        return ret;
index 323e12cc9d2f522388fe929ed66d4dd946b5881d..978c3a8108936381309de4681e90400aeb86874f 100644 (file)
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     struct btrfs_path *path,
                                     struct list_head *logged_list,
-                                    struct btrfs_log_ctx *ctx)
+                                    struct btrfs_log_ctx *ctx,
+                                    const u64 start,
+                                    const u64 end)
 {
        struct extent_map *em, *n;
        struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
 
        list_sort(NULL, &extents, extent_cmp);
-
+       /*
+        * Collect any new ordered extents within the range. This is to
+        * prevent logging file extent items without waiting for the disk
+        * location they point to being written. We do this only to deal
+        * with races against concurrent lockless direct IO writes.
+        */
+       btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
        while (!list_empty(&extents)) {
                em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
                        goto out_unlock;
                }
                ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-                                               &logged_list, ctx);
+                                               &logged_list, ctx, start, end);
                if (ret) {
                        err = ret;
                        goto out_unlock;
index c32abbca9d77f65684b85d137fa41bec8bac005c..366b335946fae763e380ed884792df5efe07a2c9 100644 (file)
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        },
 };
 
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
        [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
        [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
@@ -233,6 +233,7 @@ static struct btrfs_device *__alloc_device(void)
        spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
+       btrfs_device_data_ordered_init(dev);
        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
@@ -1183,7 +1184,7 @@ again:
                struct map_lookup *map;
                int i;
 
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                for (i = 0; i < map->num_stripes; i++) {
                        u64 end;
 
@@ -2755,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                        free_extent_map(em);
                return -EINVAL;
        }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        lock_chunks(root->fs_info->chunk_root);
        check_system_chunk(trans, extent_root, map->type);
        unlock_chunks(root->fs_info->chunk_root);
@@ -3751,7 +3752,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
                btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
                btrfs_warn(fs_info,
-       "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+       "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
                        bctl->meta.target, bctl->data.target);
        }
 
@@ -4718,7 +4719,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                goto error;
        }
        set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
        em->start = start;
        em->len = num_bytes;
        em->block_start = 0;
@@ -4813,7 +4814,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        item_size = btrfs_chunk_item_size(map->num_stripes);
        stripe_size = em->orig_block_len;
 
@@ -4968,7 +4969,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
        if (!em)
                return 1;
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        for (i = 0; i < map->num_stripes; i++) {
                if (map->stripes[i].dev->missing) {
                        miss_ndevs++;
@@ -5048,7 +5049,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                return 1;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
                ret = map->num_stripes;
        else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5084,7 +5085,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                len = map->stripe_len * nr_data_stripes(map);
        free_extent_map(em);
@@ -5105,7 +5106,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                ret = 1;
        free_extent_map(em);
@@ -5264,7 +5265,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                return -EINVAL;
        }
 
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
        offset = logical - em->start;
 
        stripe_len = map->stripe_len;
@@ -5378,35 +5379,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 * target drive.
                 */
                for (i = 0; i < tmp_num_stripes; i++) {
-                       if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
-                               /*
-                                * In case of DUP, in order to keep it
-                                * simple, only add the mirror with the
-                                * lowest physical address
-                                */
-                               if (found &&
-                                   physical_of_found <=
-                                    tmp_bbio->stripes[i].physical)
-                                       continue;
-                               index_srcdev = i;
-                               found = 1;
-                               physical_of_found =
-                                       tmp_bbio->stripes[i].physical;
-                       }
+                       if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+                               continue;
+
+                       /*
+                        * In case of DUP, in order to keep it simple, only add
+                        * the mirror with the lowest physical address
+                        */
+                       if (found &&
+                           physical_of_found <= tmp_bbio->stripes[i].physical)
+                               continue;
+
+                       index_srcdev = i;
+                       found = 1;
+                       physical_of_found = tmp_bbio->stripes[i].physical;
                }
 
-               if (found) {
-                       mirror_num = index_srcdev + 1;
-                       patch_the_first_stripe_for_dev_replace = 1;
-                       physical_to_patch_in_first_stripe = physical_of_found;
-               } else {
+               btrfs_put_bbio(tmp_bbio);
+
+               if (!found) {
                        WARN_ON(1);
                        ret = -EIO;
-                       btrfs_put_bbio(tmp_bbio);
                        goto out;
                }
 
-               btrfs_put_bbio(tmp_bbio);
+               mirror_num = index_srcdev + 1;
+               patch_the_first_stripe_for_dev_replace = 1;
+               physical_to_patch_in_first_stripe = physical_of_found;
        } else if (mirror_num > map->num_stripes) {
                mirror_num = 0;
        }
@@ -5806,7 +5805,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                free_extent_map(em);
                return -EIO;
        }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
 
        length = em->len;
        rmap_len = map->stripe_len;
@@ -6069,7 +6068,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        bbio->fs_info = root->fs_info;
        atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
-       if (bbio->raid_map) {
+       if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+           ((rw & WRITE) || (mirror_num > 1))) {
                /* In this case, map_length has been set to the length of
                   a single stripe; not the whole write */
                if (rw & WRITE) {
@@ -6210,6 +6210,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        struct extent_map *em;
        u64 logical;
        u64 length;
+       u64 stripe_len;
        u64 devid;
        u8 uuid[BTRFS_UUID_SIZE];
        int num_stripes;
@@ -6218,6 +6219,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       /* Validation check */
+       if (!num_stripes) {
+               btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+                         num_stripes);
+               return -EIO;
+       }
+       if (!IS_ALIGNED(logical, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                         "invalid chunk logical %llu", logical);
+               return -EIO;
+       }
+       if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                       "invalid chunk length %llu", length);
+               return -EIO;
+       }
+       if (!is_power_of_2(stripe_len)) {
+               btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+                         stripe_len);
+               return -EIO;
+       }
+       if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+           btrfs_chunk_type(leaf, chunk)) {
+               btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+                         ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                           BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+                         btrfs_chunk_type(leaf, chunk));
+               return -EIO;
+       }
 
        read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6234,7 +6266,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        em = alloc_extent_map();
        if (!em)
                return -ENOMEM;
-       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
        map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
        if (!map) {
                free_extent_map(em);
@@ -6242,7 +6273,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        }
 
        set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
        em->start = logical;
        em->len = length;
        em->orig_start = 0;
@@ -6944,7 +6975,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
        /* In order to kick the device replace finish process */
        lock_chunks(root);
        list_for_each_entry(em, &transaction->pending_chunks, list) {
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
 
                for (i = 0; i < map->num_stripes; i++) {
                        dev = map->stripes[i].dev;
index fd953c361a43c7c7f0faf3e100df12b052c06552..6c68d6356197afb630443f383100894227b07e65 100644 (file)
@@ -126,7 +126,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
         * locks the inode's i_mutex before calling setxattr or removexattr.
         */
        if (flags & XATTR_REPLACE) {
-               ASSERT(mutex_is_locked(&inode->i_mutex));
+               ASSERT(inode_is_locked(inode));
                di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
                                        name, name_len, 0);
                if (!di)
index afa023dded5be937ddd713847b37f2d165f586be..675a3332d72fad1eed4cba643959dbb9dabde051 100644 (file)
@@ -446,7 +446,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
                return 0;
 
        cachefiles_begin_secure(cache, &saved_cred);
-       mutex_lock(&d_inode(object->backer)->i_mutex);
+       inode_lock(d_inode(object->backer));
 
        /* if there's an extension to a partial page at the end of the backing
         * file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
        ret = notify_change(object->backer, &newattrs, NULL);
 
 truncate_failed:
-       mutex_unlock(&d_inode(object->backer)->i_mutex);
+       inode_unlock(d_inode(object->backer));
        cachefiles_end_secure(cache, saved_cred);
 
        if (ret == -EIO) {
index c4b893453e0eefda8475b3537e934f2bfc46d268..1c2334c163ddea17f99298b89ec58e09c51d3bd0 100644 (file)
@@ -295,7 +295,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                                cachefiles_mark_object_buried(cache, rep, why);
                }
 
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
 
                if (ret == -EIO)
                        cachefiles_io_error(cache, "Unlink failed");
@@ -306,7 +306,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 
        /* directories have to be moved to the graveyard */
        _debug("move stale object to graveyard");
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
 try_again:
        /* first step is to make up a grave dentry in the graveyard */
@@ -423,13 +423,13 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
 
        dir = dget_parent(object->dentry);
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
                /* object allocation for the same key preemptively deleted this
                 * object's file so that it could create its own file */
                _debug("object preemptively buried");
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                ret = 0;
        } else {
                /* we need to check that our parent is _still_ our parent - it
@@ -442,7 +442,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
                        /* it got moved, presumably by cachefilesd culling it,
                         * so it's no longer in the key path and we can ignore
                         * it */
-                       mutex_unlock(&d_inode(dir)->i_mutex);
+                       inode_unlock(d_inode(dir));
                        ret = 0;
                }
        }
@@ -501,7 +501,7 @@ lookup_again:
        /* search the current directory for the element name */
        _debug("lookup '%s'", name);
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        start = jiffies;
        next = lookup_one_len(name, dir, nlen);
@@ -585,7 +585,7 @@ lookup_again:
        /* process the next component */
        if (key) {
                _debug("advance");
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                dput(dir);
                dir = next;
                next = NULL;
@@ -623,7 +623,7 @@ lookup_again:
        /* note that we're now using this object */
        ret = cachefiles_mark_object_active(cache, object);
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(dir);
        dir = NULL;
 
@@ -705,7 +705,7 @@ lookup_error:
                cachefiles_io_error(cache, "Lookup failed");
        next = NULL;
 error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(next);
 error_out2:
        dput(dir);
@@ -729,7 +729,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        _enter(",,%s", dirname);
 
        /* search the current directory for the element name */
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
 
        start = jiffies;
        subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -768,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
                       d_backing_inode(subdir)->i_ino);
        }
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        /* we need to make sure the subdir is a directory */
        ASSERT(d_backing_inode(subdir));
@@ -800,19 +800,19 @@ check_error:
        return ERR_PTR(ret);
 
 mkdir_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(subdir);
        pr_err("mkdir %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 lookup_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = PTR_ERR(subdir);
        pr_err("Lookup %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 nomem_d_alloc:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        _leave(" = -ENOMEM");
        return ERR_PTR(-ENOMEM);
 }
@@ -837,7 +837,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
        //       dir, filename);
 
        /* look up the victim */
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        start = jiffies;
        victim = lookup_one_len(filename, dir, strlen(filename));
@@ -852,7 +852,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
         * at the netfs's request whilst the cull was in progress
         */
        if (d_is_negative(victim)) {
-               mutex_unlock(&d_inode(dir)->i_mutex);
+               inode_unlock(d_inode(dir));
                dput(victim);
                _leave(" = -ENOENT [absent]");
                return ERR_PTR(-ENOENT);
@@ -881,13 +881,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 
 object_in_use:
        read_unlock(&cache->active_lock);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(victim);
        //_leave(" = -EBUSY [in use]");
        return ERR_PTR(-EBUSY);
 
 lookup_error:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        ret = PTR_ERR(victim);
        if (ret == -ENOENT) {
                /* file or dir now absent - probably retired by netfs */
@@ -947,7 +947,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
        return 0;
 
 error_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 error:
        dput(victim);
        if (ret == -ENOENT) {
@@ -982,7 +982,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
        if (IS_ERR(victim))
                return PTR_ERR(victim);
 
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(victim);
        //_leave(" = 0");
        return 0;
index b7d218a168fb81c2c028ea38ffccf2a17a1d68d5..c22213789090f975b3680b10e022a1e5f2f9fdb4 100644 (file)
@@ -1108,7 +1108,7 @@ retry_locked:
                return 0;
 
        /* past end of file? */
-       i_size = inode->i_size;   /* caller holds i_mutex */
+       i_size = i_size_read(inode);
 
        if (page_off >= i_size ||
            (pos_in_page == 0 && (pos+len) >= i_size &&
@@ -1149,7 +1149,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
                page = grab_cache_page_write_begin(mapping, index, 0);
                if (!page)
                        return -ENOMEM;
-               *pagep = page;
 
                dout("write_begin file %p inode %p page %p %d~%d\n", file,
                     inode, page, (int)pos, (int)len);
@@ -1184,8 +1183,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
                zero_user_segment(page, from+copied, len);
 
        /* did file size increase? */
-       /* (no need for i_size_read(); we caller holds i_mutex */
-       if (pos+copied > inode->i_size)
+       if (pos+copied > i_size_read(inode))
                check_cap = ceph_inode_set_size(inode, pos+copied);
 
        if (!PageUptodate(page))
@@ -1378,11 +1376,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        ret = VM_FAULT_NOPAGE;
        if ((off > size) ||
-           (page->mapping != inode->i_mapping))
+           (page->mapping != inode->i_mapping)) {
+               unlock_page(page);
                goto out;
+       }
 
        ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
-       if (ret == 0) {
+       if (ret >= 0) {
                /* success.  we'll keep the page locked. */
                set_page_dirty(page);
                ret = VM_FAULT_LOCKED;
@@ -1393,8 +1393,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                        ret = VM_FAULT_SIGBUS;
        }
 out:
-       if (ret != VM_FAULT_LOCKED)
-               unlock_page(page);
        if (ret == VM_FAULT_LOCKED ||
            ci->i_inline_version != CEPH_INLINE_NONE) {
                int dirty;
index a4766ded1ba78e8bbbff70d4c5e65b3f9060d687..a351480dbabc95891e4b61f83fb92485f8ea7b18 100644 (file)
@@ -106,7 +106,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
 
        memset(&aux, 0, sizeof(aux));
        aux.mtime = inode->i_mtime;
-       aux.size = inode->i_size;
+       aux.size = i_size_read(inode);
 
        memcpy(buffer, &aux, sizeof(aux));
 
@@ -117,9 +117,7 @@ static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
                                        uint64_t *size)
 {
        const struct ceph_inode_info* ci = cookie_netfs_data;
-       const struct inode* inode = &ci->vfs_inode;
-
-       *size = inode->i_size;
+       *size = i_size_read(&ci->vfs_inode);
 }
 
 static enum fscache_checkaux ceph_fscache_inode_check_aux(
@@ -134,7 +132,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
 
        memset(&aux, 0, sizeof(aux));
        aux.mtime = inode->i_mtime;
-       aux.size = inode->i_size;
+       aux.size = i_size_read(inode);
 
        if (memcmp(data, &aux, sizeof(aux)) != 0)
                return FSCACHE_CHECKAUX_OBSOLETE;
@@ -197,7 +195,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
                return;
 
        /* Avoid multiple racing open requests */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (ci->fscache)
                goto done;
@@ -207,7 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
                                             ci, true);
        fscache_check_consistency(ci->fscache);
 done:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
 }
 
index c69e1253b47bfbefb2eb2cb1564a5c4b9d87f9a3..cdbf8cf3d52c3354569b7c86a709f4ce3fc09e2c 100644 (file)
@@ -2030,7 +2030,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (datasync)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2046,7 +2046,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = wait_event_interruptible(ci->i_cap_wq,
                                        caps_are_flushed(inode, flush_tid));
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
index 9314b4ea2375145647aa16446a1f33ae2c8106b2..fd11fb231a2ea796e86eae933265a488ad6774e5 100644 (file)
@@ -507,7 +507,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
        loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
        loff_t retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        retval = -EINVAL;
        switch (whence) {
        case SEEK_CUR:
@@ -542,7 +542,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                }
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return retval;
 }
 
index fe02ae7f056a332220929ce653e822a0a39e2605..3b31723573268e924346c1bffddb7361390b2a8d 100644 (file)
@@ -215,7 +215,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        req->r_inode = d_inode(child);
        ihold(d_inode(child));
@@ -224,7 +224,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
 
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        if (!err) {
                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
index 3c68e6aee2f0531bdd8afb0125bbae48419d0dfb..eb9028e8cfc5197ef1ea99524fcbc67d67b4df91 100644 (file)
@@ -397,8 +397,9 @@ int ceph_release(struct inode *inode, struct file *file)
 }
 
 enum {
-       CHECK_EOF = 1,
-       READ_INLINE = 2,
+       HAVE_RETRIED = 1,
+       CHECK_EOF =    2,
+       READ_INLINE =  3,
 };
 
 /*
@@ -411,17 +412,15 @@ enum {
 static int striped_read(struct inode *inode,
                        u64 off, u64 len,
                        struct page **pages, int num_pages,
-                       int *checkeof, bool o_direct,
-                       unsigned long buf_align)
+                       int *checkeof)
 {
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len, left;
-       int io_align, page_align;
-       int pages_left;
-       int read;
+       loff_t i_size;
+       int page_align, pages_left;
+       int read, ret;
        struct page **page_pos;
-       int ret;
        bool hit_stripe, was_short;
 
        /*
@@ -432,13 +431,9 @@ static int striped_read(struct inode *inode,
        page_pos = pages;
        pages_left = num_pages;
        read = 0;
-       io_align = off & ~PAGE_MASK;
 
 more:
-       if (o_direct)
-               page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
-       else
-               page_align = pos & ~PAGE_MASK;
+       page_align = pos & ~PAGE_MASK;
        this_len = left;
        ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
                                  &ci->i_layout, pos, &this_len,
@@ -452,13 +447,12 @@ more:
        dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
             ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
 
+       i_size = i_size_read(inode);
        if (ret >= 0) {
                int didpages;
-               if (was_short && (pos + ret < inode->i_size)) {
-                       int zlen = min(this_len - ret,
-                                      inode->i_size - pos - ret);
-                       int zoff = (o_direct ? buf_align : io_align) +
-                                   read + ret;
+               if (was_short && (pos + ret < i_size)) {
+                       int zlen = min(this_len - ret, i_size - pos - ret);
+                       int zoff = (off & ~PAGE_MASK) + read + ret;
                        dout(" zero gap %llu to %llu\n",
                                pos + ret, pos + ret + zlen);
                        ceph_zero_page_vector_range(zoff, zlen, pages);
@@ -473,14 +467,14 @@ more:
                pages_left -= didpages;
 
                /* hit stripe and need continue*/
-               if (left && hit_stripe && pos < inode->i_size)
+               if (left && hit_stripe && pos < i_size)
                        goto more;
        }
 
        if (read > 0) {
                ret = read;
                /* did we bounce off eof? */
-               if (pos + left > inode->i_size)
+               if (pos + left > i_size)
                        *checkeof = CHECK_EOF;
        }
 
@@ -521,54 +515,28 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        if (ret < 0)
                return ret;
 
-       if (iocb->ki_flags & IOCB_DIRECT) {
-               while (iov_iter_count(i)) {
-                       size_t start;
-                       ssize_t n;
-
-                       n = dio_get_pagev_size(i);
-                       pages = dio_get_pages_alloc(i, n, &start, &num_pages);
-                       if (IS_ERR(pages))
-                               return PTR_ERR(pages);
-
-                       ret = striped_read(inode, off, n,
-                                          pages, num_pages, checkeof,
-                                          1, start);
-
-                       ceph_put_page_vector(pages, num_pages, true);
-
-                       if (ret <= 0)
-                               break;
-                       off += ret;
-                       iov_iter_advance(i, ret);
-                       if (ret < n)
+       num_pages = calc_pages_for(off, len);
+       pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+       if (IS_ERR(pages))
+               return PTR_ERR(pages);
+       ret = striped_read(inode, off, len, pages,
+                               num_pages, checkeof);
+       if (ret > 0) {
+               int l, k = 0;
+               size_t left = ret;
+
+               while (left) {
+                       size_t page_off = off & ~PAGE_MASK;
+                       size_t copy = min_t(size_t, left,
+                                           PAGE_SIZE - page_off);
+                       l = copy_page_to_iter(pages[k++], page_off, copy, i);
+                       off += l;
+                       left -= l;
+                       if (l < copy)
                                break;
                }
-       } else {
-               num_pages = calc_pages_for(off, len);
-               pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
-               if (IS_ERR(pages))
-                       return PTR_ERR(pages);
-               ret = striped_read(inode, off, len, pages,
-                                       num_pages, checkeof, 0, 0);
-               if (ret > 0) {
-                       int l, k = 0;
-                       size_t left = ret;
-
-                       while (left) {
-                               size_t page_off = off & ~PAGE_MASK;
-                               size_t copy = min_t(size_t,
-                                                   PAGE_SIZE - page_off, left);
-                               l = copy_page_to_iter(pages[k++], page_off,
-                                                     copy, i);
-                               off += l;
-                               left -= l;
-                               if (l < copy)
-                                       break;
-                       }
-               }
-               ceph_release_page_vector(pages, num_pages);
        }
+       ceph_release_page_vector(pages, num_pages);
 
        if (off > iocb->ki_pos) {
                ret = off - iocb->ki_pos;
@@ -579,6 +547,193 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        return ret;
 }
 
+struct ceph_aio_request {
+       struct kiocb *iocb;
+       size_t total_len;
+       int write;
+       int error;
+       struct list_head osd_reqs;
+       unsigned num_reqs;
+       atomic_t pending_reqs;
+       struct timespec mtime;
+       struct ceph_cap_flush *prealloc_cf;
+};
+
+struct ceph_aio_work {
+       struct work_struct work;
+       struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
+static void ceph_aio_complete(struct inode *inode,
+                             struct ceph_aio_request *aio_req)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int ret;
+
+       if (!atomic_dec_and_test(&aio_req->pending_reqs))
+               return;
+
+       ret = aio_req->error;
+       if (!ret)
+               ret = aio_req->total_len;
+
+       dout("ceph_aio_complete %p rc %d\n", inode, ret);
+
+       if (ret >= 0 && aio_req->write) {
+               int dirty;
+
+               loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
+               if (endoff > i_size_read(inode)) {
+                       if (ceph_inode_set_size(inode, endoff))
+                               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+               }
+
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_inline_version = CEPH_INLINE_NONE;
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+                                              &aio_req->prealloc_cf);
+               spin_unlock(&ci->i_ceph_lock);
+               if (dirty)
+                       __mark_inode_dirty(inode, dirty);
+
+       }
+
+       ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
+                                               CEPH_CAP_FILE_RD));
+
+       aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+
+       ceph_free_cap_flush(aio_req->prealloc_cf);
+       kfree(aio_req);
+}
+
+static void ceph_aio_complete_req(struct ceph_osd_request *req,
+                                 struct ceph_msg *msg)
+{
+       int rc = req->r_result;
+       struct inode *inode = req->r_inode;
+       struct ceph_aio_request *aio_req = req->r_priv;
+       struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+       int num_pages = calc_pages_for((u64)osd_data->alignment,
+                                      osd_data->length);
+
+       dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
+            inode, rc, osd_data->length);
+
+       if (rc == -EOLDSNAPC) {
+               struct ceph_aio_work *aio_work;
+               BUG_ON(!aio_req->write);
+
+               aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+               if (aio_work) {
+                       INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+                       aio_work->req = req;
+                       queue_work(ceph_inode_to_client(inode)->wb_wq,
+                                  &aio_work->work);
+                       return;
+               }
+               rc = -ENOMEM;
+       } else if (!aio_req->write) {
+               if (rc == -ENOENT)
+                       rc = 0;
+               if (rc >= 0 && osd_data->length > rc) {
+                       int zoff = osd_data->alignment + rc;
+                       int zlen = osd_data->length - rc;
+                       /*
+                        * If read is satisfied by single OSD request,
+                        * it can pass EOF. Otherwise read is within
+                        * i_size.
+                        */
+                       if (aio_req->num_reqs == 1) {
+                               loff_t i_size = i_size_read(inode);
+                               loff_t endoff = aio_req->iocb->ki_pos + rc;
+                               if (endoff < i_size)
+                                       zlen = min_t(size_t, zlen,
+                                                    i_size - endoff);
+                               aio_req->total_len = rc + zlen;
+                       }
+
+                       if (zlen > 0)
+                               ceph_zero_page_vector_range(zoff, zlen,
+                                                           osd_data->pages);
+               }
+       }
+
+       ceph_put_page_vector(osd_data->pages, num_pages, false);
+       ceph_osdc_put_request(req);
+
+       if (rc < 0)
+               cmpxchg(&aio_req->error, 0, rc);
+
+       ceph_aio_complete(inode, aio_req);
+       return;
+}
+
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+       struct ceph_aio_work *aio_work =
+               container_of(work, struct ceph_aio_work, work);
+       struct ceph_osd_request *orig_req = aio_work->req;
+       struct ceph_aio_request *aio_req = orig_req->r_priv;
+       struct inode *inode = orig_req->r_inode;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_snap_context *snapc;
+       struct ceph_osd_request *req;
+       int ret;
+
+       spin_lock(&ci->i_ceph_lock);
+       if (__ceph_have_pending_cap_snap(ci)) {
+               struct ceph_cap_snap *capsnap =
+                       list_last_entry(&ci->i_cap_snaps,
+                                       struct ceph_cap_snap,
+                                       ci_item);
+               snapc = ceph_get_snap_context(capsnap->context);
+       } else {
+               BUG_ON(!ci->i_head_snapc);
+               snapc = ceph_get_snap_context(ci->i_head_snapc);
+       }
+       spin_unlock(&ci->i_ceph_lock);
+
+       req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+                       false, GFP_NOFS);
+       if (!req) {
+               ret = -ENOMEM;
+               req = orig_req;
+               goto out;
+       }
+
+       req->r_flags =  CEPH_OSD_FLAG_ORDERSNAP |
+                       CEPH_OSD_FLAG_ONDISK |
+                       CEPH_OSD_FLAG_WRITE;
+       req->r_base_oloc = orig_req->r_base_oloc;
+       req->r_base_oid = orig_req->r_base_oid;
+
+       req->r_ops[0] = orig_req->r_ops[0];
+       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+       ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+                               snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+       ceph_osdc_put_request(orig_req);
+
+       req->r_callback = ceph_aio_complete_req;
+       req->r_inode = inode;
+       req->r_priv = aio_req;
+
+       ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+       if (ret < 0) {
+               BUG_ON(ret == -EOLDSNAPC);
+               req->r_result = ret;
+               ceph_aio_complete_req(req, NULL);
+       }
+
+       ceph_put_snap_context(snapc);
+       kfree(aio_work);
+}
+
 /*
  * Write commit request unsafe callback, called to tell us when a
  * request is unsafe (that is, in flight--has been handed to the
@@ -612,16 +767,10 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
 }
 
 
-/*
- * Synchronous write, straight from __user pointer or user pages.
- *
- * If write spans object boundary, just do multiple writes.  (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
 static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
-                      struct ceph_snap_context *snapc)
+ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+                      struct ceph_snap_context *snapc,
+                      struct ceph_cap_flush **pcf)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -630,44 +779,52 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
        struct ceph_vino vino;
        struct ceph_osd_request *req;
        struct page **pages;
-       int num_pages;
-       int written = 0;
+       struct ceph_aio_request *aio_req = NULL;
+       int num_pages = 0;
        int flags;
-       int check_caps = 0;
        int ret;
        struct timespec mtime = CURRENT_TIME;
-       size_t count = iov_iter_count(from);
+       size_t count = iov_iter_count(iter);
+       loff_t pos = iocb->ki_pos;
+       bool write = iov_iter_rw(iter) == WRITE;
 
-       if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+       if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
 
-       dout("sync_direct_write on file %p %lld~%u\n", file, pos,
-            (unsigned)count);
+       dout("sync_direct_read_write (%s) on file %p %lld~%u\n",
+            (write ? "write" : "read"), file, pos, (unsigned)count);
 
        ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
        if (ret < 0)
                return ret;
 
-       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                           pos >> PAGE_CACHE_SHIFT,
-                                           (pos + count) >> PAGE_CACHE_SHIFT);
-       if (ret < 0)
-               dout("invalidate_inode_pages2_range returned %d\n", ret);
+       if (write) {
+               ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                       pos >> PAGE_CACHE_SHIFT,
+                                       (pos + count) >> PAGE_CACHE_SHIFT);
+               if (ret < 0)
+                       dout("invalidate_inode_pages2_range returned %d\n", ret);
 
-       flags = CEPH_OSD_FLAG_ORDERSNAP |
-               CEPH_OSD_FLAG_ONDISK |
-               CEPH_OSD_FLAG_WRITE;
+               flags = CEPH_OSD_FLAG_ORDERSNAP |
+                       CEPH_OSD_FLAG_ONDISK |
+                       CEPH_OSD_FLAG_WRITE;
+       } else {
+               flags = CEPH_OSD_FLAG_READ;
+       }
 
-       while (iov_iter_count(from) > 0) {
-               u64 len = dio_get_pagev_size(from);
-               size_t start;
-               ssize_t n;
+       while (iov_iter_count(iter) > 0) {
+               u64 size = dio_get_pagev_size(iter);
+               size_t start = 0;
+               ssize_t len;
 
                vino = ceph_vino(inode);
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-                                           vino, pos, &len, 0,
-                                           2,/*include a 'startsync' command*/
-                                           CEPH_OSD_OP_WRITE, flags, snapc,
+                                           vino, pos, &size, 0,
+                                           /*include a 'startsync' command*/
+                                           write ? 2 : 1,
+                                           write ? CEPH_OSD_OP_WRITE :
+                                                   CEPH_OSD_OP_READ,
+                                           flags, snapc,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
                                            false);
@@ -676,10 +833,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                        break;
                }
 
-               osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
-
-               n = len;
-               pages = dio_get_pages_alloc(from, len, &start, &num_pages);
+               len = size;
+               pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
                if (IS_ERR(pages)) {
                        ceph_osdc_put_request(req);
                        ret = PTR_ERR(pages);
@@ -687,47 +842,128 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                }
 
                /*
-                * throw out any page cache pages in this range. this
-                * may block.
+                * To simplify error handling, allow AIO when IO within i_size
+                * or IO can be satisfied by single OSD request.
                 */
-               truncate_inode_pages_range(inode->i_mapping, pos,
-                                  (pos+n) | (PAGE_CACHE_SIZE-1));
-               osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
-                                               false, false);
+               if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
+                   (len == count || pos + count <= i_size_read(inode))) {
+                       aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
+                       if (aio_req) {
+                               aio_req->iocb = iocb;
+                               aio_req->write = write;
+                               INIT_LIST_HEAD(&aio_req->osd_reqs);
+                               if (write) {
+                                       aio_req->mtime = mtime;
+                                       swap(aio_req->prealloc_cf, *pcf);
+                               }
+                       }
+                       /* ignore error */
+               }
+
+               if (write) {
+                       /*
+                        * throw out any page cache pages in this range. this
+                        * may block.
+                        */
+                       truncate_inode_pages_range(inode->i_mapping, pos,
+                                       (pos+len) | (PAGE_CACHE_SIZE - 1));
+
+                       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+               }
+
+
+               osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
+                                                false, false);
 
-               /* BUG_ON(vino.snap != CEPH_NOSNAP); */
                ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
 
-               ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+               if (aio_req) {
+                       aio_req->total_len += len;
+                       aio_req->num_reqs++;
+                       atomic_inc(&aio_req->pending_reqs);
+
+                       req->r_callback = ceph_aio_complete_req;
+                       req->r_inode = inode;
+                       req->r_priv = aio_req;
+                       list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
+
+                       pos += len;
+                       iov_iter_advance(iter, len);
+                       continue;
+               }
+
+               ret = ceph_osdc_start_request(req->r_osdc, req, false);
                if (!ret)
                        ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
 
+               size = i_size_read(inode);
+               if (!write) {
+                       if (ret == -ENOENT)
+                               ret = 0;
+                       if (ret >= 0 && ret < len && pos + ret < size) {
+                               int zlen = min_t(size_t, len - ret,
+                                                size - pos - ret);
+                               ceph_zero_page_vector_range(start + ret, zlen,
+                                                           pages);
+                               ret += zlen;
+                       }
+                       if (ret >= 0)
+                               len = ret;
+               }
+
                ceph_put_page_vector(pages, num_pages, false);
 
                ceph_osdc_put_request(req);
-               if (ret)
+               if (ret < 0)
+                       break;
+
+               pos += len;
+               iov_iter_advance(iter, len);
+
+               if (!write && pos >= size)
                        break;
-               pos += n;
-               written += n;
-               iov_iter_advance(from, n);
 
-               if (pos > i_size_read(inode)) {
-                       check_caps = ceph_inode_set_size(inode, pos);
-                       if (check_caps)
+               if (write && pos > size) {
+                       if (ceph_inode_set_size(inode, pos))
                                ceph_check_caps(ceph_inode(inode),
                                                CHECK_CAPS_AUTHONLY,
                                                NULL);
                }
        }
 
-       if (ret != -EOLDSNAPC && written > 0) {
+       if (aio_req) {
+               if (aio_req->num_reqs == 0) {
+                       kfree(aio_req);
+                       return ret;
+               }
+
+               ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
+                                             CEPH_CAP_FILE_RD);
+
+               while (!list_empty(&aio_req->osd_reqs)) {
+                       req = list_first_entry(&aio_req->osd_reqs,
+                                              struct ceph_osd_request,
+                                              r_unsafe_item);
+                       list_del_init(&req->r_unsafe_item);
+                       if (ret >= 0)
+                               ret = ceph_osdc_start_request(req->r_osdc,
+                                                             req, false);
+                       if (ret < 0) {
+                               BUG_ON(ret == -EOLDSNAPC);
+                               req->r_result = ret;
+                               ceph_aio_complete_req(req, NULL);
+                       }
+               }
+               return -EIOCBQUEUED;
+       }
+
+       if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
+               ret = pos - iocb->ki_pos;
                iocb->ki_pos = pos;
-               ret = written;
        }
        return ret;
 }
 
-
 /*
  * Synchronous write, straight from __user pointer or user pages.
  *
@@ -897,8 +1133,14 @@ again:
                     ceph_cap_string(got));
 
                if (ci->i_inline_version == CEPH_INLINE_NONE) {
-                       /* hmm, this isn't really async... */
-                       ret = ceph_sync_read(iocb, to, &retry_op);
+                       if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                               ret = ceph_direct_read_write(iocb, to,
+                                                            NULL, NULL);
+                               if (ret >= 0 && ret < len)
+                                       retry_op = CHECK_EOF;
+                       } else {
+                               ret = ceph_sync_read(iocb, to, &retry_op);
+                       }
                } else {
                        retry_op = READ_INLINE;
                }
@@ -916,7 +1158,7 @@ again:
                pinned_page = NULL;
        }
        ceph_put_cap_refs(ci, got);
-       if (retry_op && ret >= 0) {
+       if (retry_op > HAVE_RETRIED && ret >= 0) {
                int statret;
                struct page *page = NULL;
                loff_t i_size;
@@ -968,12 +1210,11 @@ again:
                if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
                    ret < len) {
                        dout("sync_read hit hole, ppos %lld < size %lld"
-                            ", reading more\n", iocb->ki_pos,
-                            inode->i_size);
+                            ", reading more\n", iocb->ki_pos, i_size);
 
                        read += ret;
                        len -= ret;
-                       retry_op = 0;
+                       retry_op = HAVE_RETRIED;
                        goto again;
                }
        }
@@ -1014,7 +1255,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (!prealloc_cf)
                return -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1052,7 +1293,7 @@ retry_snap:
        }
 
        dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
-            inode, ceph_vinop(inode), pos, count, inode->i_size);
+            inode, ceph_vinop(inode), pos, count, i_size_read(inode));
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
                want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
        else
@@ -1070,7 +1311,7 @@ retry_snap:
            (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
                struct ceph_snap_context *snapc;
                struct iov_iter data;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                spin_lock(&ci->i_ceph_lock);
                if (__ceph_have_pending_cap_snap(ci)) {
@@ -1088,8 +1329,8 @@ retry_snap:
                /* we might need to revert back to that point */
                data = *from;
                if (iocb->ki_flags & IOCB_DIRECT)
-                       written = ceph_sync_direct_write(iocb, &data, pos,
-                                                        snapc);
+                       written = ceph_direct_read_write(iocb, &data, snapc,
+                                                        &prealloc_cf);
                else
                        written = ceph_sync_write(iocb, &data, pos, snapc);
                if (written == -EOLDSNAPC) {
@@ -1097,14 +1338,14 @@ retry_snap:
                                "got EOLDSNAPC, retrying\n",
                                inode, ceph_vinop(inode),
                                pos, (unsigned)count);
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        goto retry_snap;
                }
                if (written > 0)
                        iov_iter_advance(from, written);
                ceph_put_snap_context(snapc);
        } else {
-               loff_t old_size = inode->i_size;
+               loff_t old_size = i_size_read(inode);
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -1115,9 +1356,9 @@ retry_snap:
                written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
-               if (inode->i_size > old_size)
+               if (i_size_read(inode) > old_size)
                        ceph_fscache_update_objectsize(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        if (written >= 0) {
@@ -1147,7 +1388,7 @@ retry_snap:
        goto out_unlocked;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out_unlocked:
        ceph_free_cap_flush(prealloc_cf);
        current->backing_dev_info = NULL;
@@ -1160,9 +1401,10 @@ out_unlocked:
 static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
 {
        struct inode *inode = file->f_mapping->host;
+       loff_t i_size;
        int ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
                ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@@ -1172,9 +1414,10 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
                }
        }
 
+       i_size = i_size_read(inode);
        switch (whence) {
        case SEEK_END:
-               offset += inode->i_size;
+               offset += i_size;
                break;
        case SEEK_CUR:
                /*
@@ -1190,24 +1433,24 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
                offset += file->f_pos;
                break;
        case SEEK_DATA:
-               if (offset >= inode->i_size) {
+               if (offset >= i_size) {
                        ret = -ENXIO;
                        goto out;
                }
                break;
        case SEEK_HOLE:
-               if (offset >= inode->i_size) {
+               if (offset >= i_size) {
                        ret = -ENXIO;
                        goto out;
                }
-               offset = inode->i_size;
+               offset = i_size;
                break;
        }
 
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -1363,7 +1606,7 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!prealloc_cf)
                return -ENOMEM;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (ceph_snap(inode) != CEPH_NOSNAP) {
                ret = -EROFS;
@@ -1418,7 +1661,7 @@ static long ceph_fallocate(struct file *file, int mode,
 
        ceph_put_cap_refs(ci, got);
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        ceph_free_cap_flush(prealloc_cf);
        return ret;
 }
index da55eb8bcffab89755baf5229b92ededf49dd484..fb4ba2e4e2a5fa5c5d62afa3b94f758c7906687b 100644 (file)
@@ -548,7 +548,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
        if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
            (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
                dout("size %lld -> %llu\n", inode->i_size, size);
-               inode->i_size = size;
+               i_size_write(inode, size);
                inode->i_blocks = (size + (1<<9) - 1) >> 9;
                ci->i_reported_size = size;
                if (truncate_seq != ci->i_truncate_seq) {
@@ -808,7 +808,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                        spin_unlock(&ci->i_ceph_lock);
 
                        err = -EINVAL;
-                       if (WARN_ON(symlen != inode->i_size))
+                       if (WARN_ON(symlen != i_size_read(inode)))
                                goto out;
 
                        err = -ENOMEM;
@@ -1549,7 +1549,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
 
        spin_lock(&ci->i_ceph_lock);
        dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
-       inode->i_size = size;
+       i_size_write(inode, size);
        inode->i_blocks = (size + (1 << 9) - 1) >> 9;
 
        /* tell the MDS if we are approaching max_size */
@@ -1911,7 +1911,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                     inode->i_size, attr->ia_size);
                if ((issued & CEPH_CAP_FILE_EXCL) &&
                    attr->ia_size > inode->i_size) {
-                       inode->i_size = attr->ia_size;
+                       i_size_write(inode, attr->ia_size);
                        inode->i_blocks =
                                (attr->ia_size + (1 << 9) - 1) >> 9;
                        inode->i_ctime = attr->ia_ctime;
index 7febcf2475c5ab675c04dfd2fddaa3ed574522a0..50b268483302922ee63d205a2fa560ec27676eed 100644 (file)
@@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
        vaf.fmt = fmt;
        vaf.va = &args;
 
-       pr_err("CIFS VFS: %pV", &vaf);
+       pr_err_ratelimited("CIFS VFS: %pV", &vaf);
 
        va_end(args);
 }
index f40fbaca1b2a2c1d7457d71bd1a5e7dd4b477859..66cf0f9fff8984cb12eed1c89d91bc9c6ccc6a9d 100644 (file)
@@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
 /* information message: e.g., configuration, major event */
 #define cifs_dbg(type, fmt, ...)                                       \
 do {                                                                   \
-       if (type == FYI) {                                              \
-               if (cifsFYI & CIFS_INFO) {                              \
-                       pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__);  \
-               }                                                       \
+       if (type == FYI && cifsFYI & CIFS_INFO) {                       \
+               pr_debug_ratelimited("%s: "                             \
+                           fmt, __FILE__, ##__VA_ARGS__);              \
        } else if (type == VFS) {                                       \
                cifs_vfs_err(fmt, ##__VA_ARGS__);                       \
        } else if (type == NOISY && type != 0) {                        \
-               pr_debug(fmt, ##__VA_ARGS__);                           \
+               pr_debug_ratelimited(fmt, ##__VA_ARGS__);               \
        }                                                               \
 } while (0)
 
index c4c1169814b21d15463410ca474ef8a8608c0ed0..c48ca13673e37f2eba7c4045f140e6e8b740e499 100644 (file)
@@ -507,6 +507,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
        seq_printf(s, ",rsize=%u", cifs_sb->rsize);
        seq_printf(s, ",wsize=%u", cifs_sb->wsize);
+       seq_printf(s, ",echo_interval=%lu",
+                       tcon->ses->server->echo_interval / HZ);
        /* convert actimeo and display it in seconds */
        seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
@@ -640,9 +642,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
                while (*s && *s != sep)
                        s++;
 
-               mutex_lock(&dir->i_mutex);
+               inode_lock(dir);
                child = lookup_one_len(p, dentry, s - p);
-               mutex_unlock(&dir->i_mutex);
+               inode_unlock(dir);
                dput(dentry);
                dentry = child;
        } while (!IS_ERR(dentry));
@@ -752,6 +754,9 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        ssize_t rc;
        struct inode *inode = file_inode(iocb->ki_filp);
 
+       if (iocb->ki_filp->f_flags & O_DIRECT)
+               return cifs_user_readv(iocb, iter);
+
        rc = cifs_revalidate_mapping(inode);
        if (rc)
                return rc;
@@ -766,6 +771,18 @@ static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        ssize_t written;
        int rc;
 
+       if (iocb->ki_filp->f_flags & O_DIRECT) {
+               written = cifs_user_writev(iocb, from);
+               if (written > 0 && CIFS_CACHE_READ(cinode)) {
+                       cifs_zap_mapping(inode);
+                       cifs_dbg(FYI,
+                                "Set no oplock for inode=%p after a write operation\n",
+                                inode);
+                       cinode->oplock = 0;
+               }
+               return written;
+       }
+
        written = cifs_get_writer(cinode);
        if (written)
                return written;
index 2b510c537a0d588c532fe9200cdd4a9b03a4c9f4..a25b2513f1460608596f9c9fa7a9cb8e8383e5f2 100644 (file)
 #define SERVER_NAME_LENGTH 40
 #define SERVER_NAME_LEN_WITH_NULL     (SERVER_NAME_LENGTH + 1)
 
-/* SMB echo "timeout" -- FIXME: tunable? */
-#define SMB_ECHO_INTERVAL (60 * HZ)
+/* echo interval in seconds */
+#define SMB_ECHO_INTERVAL_MIN 1
+#define SMB_ECHO_INTERVAL_MAX 600
+#define SMB_ECHO_INTERVAL_DEFAULT 60
 
 #include "cifspdu.h"
 
@@ -225,7 +227,7 @@ struct smb_version_operations {
        void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
        void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
        /* verify the message */
-       int (*check_message)(char *, unsigned int);
+       int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
        bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
        void (*downgrade_oplock)(struct TCP_Server_Info *,
                                        struct cifsInodeInfo *, bool);
@@ -507,6 +509,7 @@ struct smb_vol {
        struct sockaddr_storage dstaddr; /* destination address */
        struct sockaddr_storage srcaddr; /* allow binding to a local IP */
        struct nls_table *local_nls;
+       unsigned int echo_interval; /* echo interval in secs */
 };
 
 #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -627,7 +630,9 @@ struct TCP_Server_Info {
 #ifdef CONFIG_CIFS_SMB2
        unsigned int    max_read;
        unsigned int    max_write;
+       __u8            preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
+       unsigned long echo_interval;
 };
 
 static inline unsigned int
@@ -809,7 +814,10 @@ struct cifs_ses {
        bool need_reconnect:1; /* connection reset, uid now invalid */
 #ifdef CONFIG_CIFS_SMB2
        __u16 session_flags;
-       char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
+       __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
 };
 
index c63fd1dde25b861b011f604522572c5619f177f1..eed7ff50faf016e6714867542953334b199e7627 100644 (file)
@@ -102,7 +102,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
                        struct smb_hdr *out_buf,
                        int *bytes_returned);
 extern int cifs_reconnect(struct TCP_Server_Info *server);
-extern int checkSMB(char *buf, unsigned int length);
+extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
 extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
 extern bool backup_cred(struct cifs_sb_info *);
 extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -439,7 +439,8 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
 extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
 extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
 extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb3signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *);
+extern int generate_smb311signingkey(struct cifs_ses *);
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern int calc_lanman_hash(const char *password, const char *cryptkey,
index ecb0803bdb0e50479f50b0e874a4e87cb064e2d0..4fbd92d2e113e69d5e3105235a215a52b2e756e7 100644 (file)
@@ -95,6 +95,7 @@ enum {
        Opt_cruid, Opt_gid, Opt_file_mode,
        Opt_dirmode, Opt_port,
        Opt_rsize, Opt_wsize, Opt_actimeo,
+       Opt_echo_interval,
 
        /* Mount options which take string value */
        Opt_user, Opt_pass, Opt_ip,
@@ -188,6 +189,7 @@ static const match_table_t cifs_mount_option_tokens = {
        { Opt_rsize, "rsize=%s" },
        { Opt_wsize, "wsize=%s" },
        { Opt_actimeo, "actimeo=%s" },
+       { Opt_echo_interval, "echo_interval=%s" },
 
        { Opt_blank_user, "user=" },
        { Opt_blank_user, "username=" },
@@ -368,7 +370,6 @@ cifs_reconnect(struct TCP_Server_Info *server)
        server->session_key.response = NULL;
        server->session_key.len = 0;
        server->lstrp = jiffies;
-       mutex_unlock(&server->srv_mutex);
 
        /* mark submitted MIDs for retry and issue callback */
        INIT_LIST_HEAD(&retry_list);
@@ -381,6 +382,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                list_move(&mid_entry->qhead, &retry_list);
        }
        spin_unlock(&GlobalMid_Lock);
+       mutex_unlock(&server->srv_mutex);
 
        cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
        list_for_each_safe(tmp, tmp2, &retry_list) {
@@ -418,6 +420,7 @@ cifs_echo_request(struct work_struct *work)
        int rc;
        struct TCP_Server_Info *server = container_of(work,
                                        struct TCP_Server_Info, echo.work);
+       unsigned long echo_interval = server->echo_interval;
 
        /*
         * We cannot send an echo if it is disabled or until the
@@ -427,7 +430,7 @@ cifs_echo_request(struct work_struct *work)
         */
        if (!server->ops->need_neg || server->ops->need_neg(server) ||
            (server->ops->can_echo && !server->ops->can_echo(server)) ||
-           time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+           time_before(jiffies, server->lstrp + echo_interval - HZ))
                goto requeue_echo;
 
        rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -436,7 +439,7 @@ cifs_echo_request(struct work_struct *work)
                         server->hostname);
 
 requeue_echo:
-       queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
+       queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
 }
 
 static bool
@@ -487,9 +490,9 @@ server_unresponsive(struct TCP_Server_Info *server)
         *     a response in >60s.
         */
        if (server->tcpStatus == CifsGood &&
-           time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
-               cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
-                        server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+           time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
+               cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
+                        server->hostname, (2 * server->echo_interval) / HZ);
                cifs_reconnect(server);
                wake_up(&server->response_q);
                return true;
@@ -828,7 +831,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
         * 48 bytes is enough to display the header and a little bit
         * into the payload for debugging purposes.
         */
-       length = server->ops->check_message(buf, server->total_read);
+       length = server->ops->check_message(buf, server->total_read, server);
        if (length != 0)
                cifs_dump_mem("Bad SMB: ", buf,
                        min_t(unsigned int, server->total_read, 48));
@@ -1624,6 +1627,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                goto cifs_parse_mount_err;
                        }
                        break;
+               case Opt_echo_interval:
+                       if (get_option_ul(args, &option)) {
+                               cifs_dbg(VFS, "%s: Invalid echo interval value\n",
+                                        __func__);
+                               goto cifs_parse_mount_err;
+                       }
+                       vol->echo_interval = option;
+                       break;
 
                /* String Arguments */
 
@@ -2089,6 +2100,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
        if (!match_security(server, vol))
                return 0;
 
+       if (server->echo_interval != vol->echo_interval)
+               return 0;
+
        return 1;
 }
 
@@ -2208,6 +2222,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        tcp_ses->tcpStatus = CifsNew;
        ++tcp_ses->srv_count;
 
+       if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+               volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+               tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+       else
+               tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
+
        rc = ip_connect(tcp_ses);
        if (rc < 0) {
                cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
@@ -2237,7 +2257,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        cifs_fscache_get_client_cookie(tcp_ses);
 
        /* queue echo request delayed work */
-       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
+       queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
 
        return tcp_ses;
 
index 0a2752b79e72cc2b7a083894843a8b3ae1dea23d..ff882aeaccc67c404a289fff472a26d8537e1128 100644 (file)
@@ -2267,7 +2267,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (rc)
                return rc;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        xid = get_xid();
 
@@ -2292,7 +2292,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        }
 
        free_xid(xid);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
@@ -2309,7 +2309,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (rc)
                return rc;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        xid = get_xid();
 
@@ -2326,7 +2326,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        }
 
        free_xid(xid);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return rc;
 }
 
@@ -2672,7 +2672,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
         * with a brlock that prevents writing.
         */
        down_read(&cinode->lock_sem);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        rc = generic_write_checks(iocb, from);
        if (rc <= 0)
@@ -2685,7 +2685,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
        else
                rc = -EACCES;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (rc > 0) {
                ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
index a329f5ba35aad8649ce3f644b9b11913c57cc5f8..aeb26dbfa1bf2dbfb2d111316064d240ef807336 100644 (file)
@@ -814,8 +814,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                        }
                } else
                        fattr.cf_uniqueid = iunique(sb, ROOT_I);
-       } else
-               fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+       } else {
+               if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+                   validinum == false && server->ops->get_srv_inum) {
+                       /*
+                        * Pass a NULL tcon to ensure we don't make a round
+                        * trip to the server. This only works for SMB2+.
+                        */
+                       tmprc = server->ops->get_srv_inum(xid,
+                               NULL, cifs_sb, full_path,
+                               &fattr.cf_uniqueid, data);
+                       if (tmprc)
+                               fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+               } else
+                       fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+       }
 
        /* query for SFU type info if supported and needed */
        if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
@@ -856,6 +869,13 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
        } else {
                /* we already have inode, update it */
 
+               /* if uniqueid is different, return error */
+               if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+                   CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+                       rc = -ESTALE;
+                       goto cgii_exit;
+               }
+
                /* if filetype is different, return error */
                if (unlikely(((*inode)->i_mode & S_IFMT) !=
                    (fattr.cf_mode & S_IFMT))) {
index 8442b8b8e0be145e7f68055e0025f1909442d156..813fe13c2ae175869cdc6db06a19392d09d7f05f 100644 (file)
@@ -310,7 +310,7 @@ check_smb_hdr(struct smb_hdr *smb)
 }
 
 int
-checkSMB(char *buf, unsigned int total_read)
+checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
 {
        struct smb_hdr *smb = (struct smb_hdr *)buf;
        __u32 rfclen = be32_to_cpu(smb->smb_buf_length);
index 0557c45e9c3308a368f7ff3e73f9423c81eedea4..b30a4a6d98a0f002c235bf3128012aff06ba2bc3 100644 (file)
@@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
                 * if buggy server returns . and .. late do we want to
                 * check for that here?
                 */
+               *tmp_buf = 0;
                rc = cifs_filldir(current_entry, file, ctx,
                                  tmp_buf, max_len);
                if (rc) {
index 1c5907019045517ac302daf4c04e8caea21f1fda..389fb9f8c84e22308ac5fcb44c27f005bee6e27c 100644 (file)
@@ -38,7 +38,7 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
         * Make sure that this really is an SMB, that it is a response,
         * and that the message ids match.
         */
-       if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+       if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
            (mid == wire_mid)) {
                if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
                        return 0;
@@ -50,9 +50,9 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
                                cifs_dbg(VFS, "Received Request not response\n");
                }
        } else { /* bad signature or mid */
-               if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
+               if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
                        cifs_dbg(VFS, "Bad protocol string signature header %x\n",
-                                *(unsigned int *) hdr->ProtocolId);
+                                le32_to_cpu(hdr->ProtocolId));
                if (mid != wire_mid)
                        cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
                                 mid, wire_mid);
@@ -93,11 +93,11 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
 };
 
 int
-smb2_check_message(char *buf, unsigned int length)
+smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
 {
        struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
        struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
-       __u64 mid = le64_to_cpu(hdr->MessageId);
+       __u64 mid;
        __u32 len = get_rfc1002_length(buf);
        __u32 clc_len;  /* calculated length */
        int command;
@@ -111,6 +111,30 @@ smb2_check_message(char *buf, unsigned int length)
         * ie Validate the wct via smb2_struct_sizes table above
         */
 
+       if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+               struct smb2_transform_hdr *thdr =
+                       (struct smb2_transform_hdr *)buf;
+               struct cifs_ses *ses = NULL;
+               struct list_head *tmp;
+
+               /* decrypt frame now that it is completely read in */
+               spin_lock(&cifs_tcp_ses_lock);
+               list_for_each(tmp, &srvr->smb_ses_list) {
+                       ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+                       if (ses->Suid == thdr->SessionId)
+                               break;
+
+                       ses = NULL;
+               }
+               spin_unlock(&cifs_tcp_ses_lock);
+               if (ses == NULL) {
+                       cifs_dbg(VFS, "no decryption - session id not found\n");
+                       return 1;
+               }
+       }
+
+
+       mid = le64_to_cpu(hdr->MessageId);
        if (length < sizeof(struct smb2_pdu)) {
                if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
                        pdu->StructureSize2 = 0;
@@ -322,7 +346,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
 
        /* return pointer to beginning of data area, ie offset from SMB start */
        if ((*off != 0) && (*len != 0))
-               return (char *)(&hdr->ProtocolId[0]) + *off;
+               return (char *)(&hdr->ProtocolId) + *off;
        else
                return NULL;
 }
index 53ccdde6ff187c16084e10dd07372d6516d25139..3525ed756173d40009017ca11c9ae04cbd97f34a 100644 (file)
@@ -182,6 +182,11 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
        struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
        __u64 wire_mid = le64_to_cpu(hdr->MessageId);
 
+       if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+               cifs_dbg(VFS, "encrypted frame parsing not supported yet");
+               return NULL;
+       }
+
        spin_lock(&GlobalMid_Lock);
        list_for_each_entry(mid, &server->pending_mid_q, qhead) {
                if ((mid->mid == wire_mid) &&
@@ -1692,7 +1697,7 @@ struct smb_version_operations smb30_operations = {
        .get_lease_key = smb2_get_lease_key,
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
-       .generate_signingkey = generate_smb3signingkey,
+       .generate_signingkey = generate_smb30signingkey,
        .calc_signature = smb3_calc_signature,
        .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
@@ -1779,7 +1784,7 @@ struct smb_version_operations smb311_operations = {
        .get_lease_key = smb2_get_lease_key,
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
-       .generate_signingkey = generate_smb3signingkey,
+       .generate_signingkey = generate_smb311signingkey,
        .calc_signature = smb3_calc_signature,
        .set_integrity  = smb3_set_integrity,
        .is_read_op = smb21_is_read_op,
@@ -1838,7 +1843,7 @@ struct smb_version_values smb21_values = {
 struct smb_version_values smb30_values = {
        .version_string = SMB30_VERSION_STRING,
        .protocol_id = SMB30_PROT_ID,
-       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -1858,7 +1863,7 @@ struct smb_version_values smb30_values = {
 struct smb_version_values smb302_values = {
        .version_string = SMB302_VERSION_STRING,
        .protocol_id = SMB302_PROT_ID,
-       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+       .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
index 767555518d40ccd357b6f5540f2758bb8be6e654..10f8d5cf5681e26b843c0ac15821eab69e6eeb22 100644 (file)
@@ -97,10 +97,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
        hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
                        - 4 /*  RFC 1001 length field itself not counted */);
 
-       hdr->ProtocolId[0] = 0xFE;
-       hdr->ProtocolId[1] = 'S';
-       hdr->ProtocolId[2] = 'M';
-       hdr->ProtocolId[3] = 'B';
+       hdr->ProtocolId = SMB2_PROTO_NUMBER;
        hdr->StructureSize = cpu_to_le16(64);
        hdr->Command = smb2_cmd;
        hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
@@ -1573,7 +1570,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
                goto ioctl_exit;
        }
 
-       memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+       memcpy(*out_data,
+              (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
               *plen);
 ioctl_exit:
        free_rsp_buf(resp_buftype, rsp);
@@ -2093,7 +2091,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
        }
 
        if (*buf) {
-               memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+               memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
                       *nbytes);
                free_rsp_buf(resp_buftype, iov[0].iov_base);
        } else if (resp_buftype != CIFS_NO_BUFFER) {
index 4af52780ec350323cd41e993389e7214fb50f97d..ff88d9feb01e7475f75a230c004d5f40a80f14f9 100644 (file)
@@ -86,6 +86,7 @@
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
 
 /*
  * SMB2 Header Definition
@@ -102,7 +103,7 @@ struct smb2_hdr {
        __be32 smb2_buf_length; /* big endian on wire */
                                /* length is only two or three bytes - with
                                 one or two byte type preceding it that MBZ */
-       __u8   ProtocolId[4];   /* 0xFE 'S' 'M' 'B' */
+       __le32 ProtocolId;      /* 0xFE 'S' 'M' 'B' */
        __le16 StructureSize;   /* 64 */
        __le16 CreditCharge;    /* MBZ */
        __le32 Status;          /* Error from server */
@@ -128,11 +129,10 @@ struct smb2_transform_hdr {
                                 one or two byte type preceding it that MBZ */
        __u8   ProtocolId[4];   /* 0xFD 'S' 'M' 'B' */
        __u8   Signature[16];
-       __u8   Nonce[11];
-       __u8   Reserved[5];
+       __u8   Nonce[16];
        __le32 OriginalMessageSize;
        __u16  Reserved1;
-       __le16 EncryptionAlgorithm;
+       __le16 Flags; /* EncryptionAlgorithm */
        __u64  SessionId;
 } __packed;
 
index 79dc650c18b26baf2e9a4ca8ac2c67df5ebdc63e..4f07dc93608db3fc8723f53d86219087b7cc76c8 100644 (file)
@@ -34,7 +34,8 @@ struct smb_rqst;
  *****************************************************************
  */
 extern int map_smb2_to_linux_error(char *buf, bool log_err);
-extern int smb2_check_message(char *buf, unsigned int length);
+extern int smb2_check_message(char *buf, unsigned int length,
+                             struct TCP_Server_Info *server);
 extern unsigned int smb2_calc_size(void *buf);
 extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
index d4c5b6f109a7feaa6f2c99f21ca332ff41a2673f..8732a43b10084bf787a8410498e3ec51c69ea93c 100644 (file)
@@ -222,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        return rc;
 }
 
-int
-generate_smb3signingkey(struct cifs_ses *ses)
+static int generate_key(struct cifs_ses *ses, struct kvec label,
+                       struct kvec context, __u8 *key, unsigned int key_size)
 {
        unsigned char zero = 0x0;
        __u8 i[4] = {0, 0, 0, 1};
@@ -233,7 +233,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        unsigned char *hashptr = prfhash;
 
        memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
-       memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+       memset(key, 0x0, key_size);
 
        rc = smb3_crypto_shash_allocate(ses->server);
        if (rc) {
@@ -262,7 +262,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        }
 
        rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
-                               "SMB2AESCMAC", 12);
+                               label.iov_base, label.iov_len);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
                goto smb3signkey_ret;
@@ -276,7 +276,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
        }
 
        rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
-                               "SmbSign", 8);
+                               context.iov_base, context.iov_len);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
                goto smb3signkey_ret;
@@ -296,12 +296,102 @@ generate_smb3signingkey(struct cifs_ses *ses)
                goto smb3signkey_ret;
        }
 
-       memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+       memcpy(key, hashptr, key_size);
 
 smb3signkey_ret:
        return rc;
 }
 
+struct derivation {
+       struct kvec label;
+       struct kvec context;
+};
+
+struct derivation_triplet {
+       struct derivation signing;
+       struct derivation encryption;
+       struct derivation decryption;
+};
+
+static int
+generate_smb3signingkey(struct cifs_ses *ses,
+                       const struct derivation_triplet *ptriplet)
+{
+       int rc;
+
+       rc = generate_key(ses, ptriplet->signing.label,
+                         ptriplet->signing.context, ses->smb3signingkey,
+                         SMB3_SIGN_KEY_SIZE);
+       if (rc)
+               return rc;
+
+       rc = generate_key(ses, ptriplet->encryption.label,
+                         ptriplet->encryption.context, ses->smb3encryptionkey,
+                         SMB3_SIGN_KEY_SIZE);
+       if (rc)
+               return rc;
+
+       return generate_key(ses, ptriplet->decryption.label,
+                           ptriplet->decryption.context,
+                           ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+}
+
+int
+generate_smb30signingkey(struct cifs_ses *ses)
+
+{
+       struct derivation_triplet triplet;
+       struct derivation *d;
+
+       d = &triplet.signing;
+       d->label.iov_base = "SMB2AESCMAC";
+       d->label.iov_len = 12;
+       d->context.iov_base = "SmbSign";
+       d->context.iov_len = 8;
+
+       d = &triplet.encryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerIn ";
+       d->context.iov_len = 10;
+
+       d = &triplet.decryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerOut";
+       d->context.iov_len = 10;
+
+       return generate_smb3signingkey(ses, &triplet);
+}
+
+int
+generate_smb311signingkey(struct cifs_ses *ses)
+
+{
+       struct derivation_triplet triplet;
+       struct derivation *d;
+
+       d = &triplet.signing;
+       d->label.iov_base = "SMB2AESCMAC";
+       d->label.iov_len = 12;
+       d->context.iov_base = "SmbSign";
+       d->context.iov_len = 8;
+
+       d = &triplet.encryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerIn ";
+       d->context.iov_len = 10;
+
+       d = &triplet.decryption;
+       d->label.iov_base = "SMB2AESCCM";
+       d->label.iov_len = 11;
+       d->context.iov_base = "ServerOut";
+       d->context.iov_len = 10;
+
+       return generate_smb3signingkey(ses, &triplet);
+}
+
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
index 2a24c524fb9a90cedd4187460ecb90c0e5dfcf0c..87abe8ed074c31962a969b178d55757e44bdc0d5 100644 (file)
@@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
        cifs_in_send_dec(server);
        cifs_save_when_sent(mid);
 
-       if (rc < 0)
+       if (rc < 0) {
                server->sequence_number -= 2;
+               cifs_delete_mid(mid);
+       }
+
        mutex_unlock(&server->srv_mutex);
 
        if (rc == 0)
                return 0;
 
-       cifs_delete_mid(mid);
        add_credits_and_wake_if(server, credits, optype);
        return rc;
 }
index f829fe963f5bab9e1de4b81ec9195de90e5da77d..5104d84c4f6425c5ea950a889040a5ca54527c29 100644 (file)
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
 } while (0)
 
 
-#define CODA_FREE(ptr,size) \
-    do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
 
 /* inode to cnode access functions */
 
index fda9f4311212fe4a6d3b884d43dfdb7b5ff51e7e..42e731b8c80a680602d401b6b28239acf74291e0 100644 (file)
@@ -427,13 +427,13 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
        if (host_file->f_op->iterate) {
                struct inode *host_inode = file_inode(host_file);
 
-               mutex_lock(&host_inode->i_mutex);
+               inode_lock(host_inode);
                ret = -ENOENT;
                if (!IS_DEADDIR(host_inode)) {
                        ret = host_file->f_op->iterate(host_file, ctx);
                        file_accessed(host_file);
                }
-               mutex_unlock(&host_inode->i_mutex);
+               inode_unlock(host_inode);
                return ret;
        }
        /* Venus: we must read Venus dirents from a file */
index 1da3805f3ddcdb209a6b357e12aacac35809dda9..f47c7483863b5ae55b1160de0f059ec23b3d4cb3 100644 (file)
@@ -71,12 +71,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
 
        host_file = cfi->cfi_container;
        file_start_write(host_file);
-       mutex_lock(&coda_inode->i_mutex);
+       inode_lock(coda_inode);
        ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
        coda_inode->i_size = file_inode(host_file)->i_size;
        coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
        coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
-       mutex_unlock(&coda_inode->i_mutex);
+       inode_unlock(coda_inode);
        file_end_write(host_file);
        return ret;
 }
@@ -203,7 +203,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
        err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&coda_inode->i_mutex);
+       inode_lock(coda_inode);
 
        cfi = CODA_FTOC(coda_file);
        BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -212,7 +212,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
        err = vfs_fsync(host_file, datasync);
        if (!err && !datasync)
                err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
-       mutex_unlock(&coda_inode->i_mutex);
+       inode_unlock(coda_inode);
 
        return err;
 }
index a5b8eb69a8f42526d4fb4d46d1cc26d33e78e6b2..6402eaf8ab958f27cc3371d0773ad45046dc0b8e 100644 (file)
@@ -1261,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
+COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
+COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
 COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
 COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
 COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
index cab612b2ae767bbe2ffeb187a6f630f891cd52f3..f419519ec41fb4f3e932fe18561b34f3313e747b 100644 (file)
@@ -640,13 +640,13 @@ static void detach_groups(struct config_group *group)
 
                child = sd->s_dentry;
 
-               mutex_lock(&d_inode(child)->i_mutex);
+               inode_lock(d_inode(child));
 
                configfs_detach_group(sd->s_element);
                d_inode(child)->i_flags |= S_DEAD;
                dont_mount(child);
 
-               mutex_unlock(&d_inode(child)->i_mutex);
+               inode_unlock(d_inode(child));
 
                d_delete(child);
                dput(child);
@@ -834,11 +834,11 @@ static int configfs_attach_item(struct config_item *parent_item,
                         * the VFS may already have hit and used them. Thus,
                         * we must lock them as rmdir() would.
                         */
-                       mutex_lock(&d_inode(dentry)->i_mutex);
+                       inode_lock(d_inode(dentry));
                        configfs_remove_dir(item);
                        d_inode(dentry)->i_flags |= S_DEAD;
                        dont_mount(dentry);
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        d_delete(dentry);
                }
        }
@@ -874,7 +874,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                 * We must also lock the inode to remove it safely in case of
                 * error, as rmdir() would.
                 */
-               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
                configfs_adjust_dir_dirent_depth_before_populate(sd);
                ret = populate_groups(to_config_group(item));
                if (ret) {
@@ -883,7 +883,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                        dont_mount(dentry);
                }
                configfs_adjust_dir_dirent_depth_after_populate(sd);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
                if (ret)
                        d_delete(dentry);
        }
@@ -1135,7 +1135,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
         * subsystem is really registered, and so we need to lock out
         * configfs_[un]register_subsystem().
         */
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item);
        if (!subsys_sd) {
@@ -1147,7 +1147,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
        ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
 
 out_unlock_fs:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        /*
         * If we succeeded, the fs is pinned via other methods.  If not,
@@ -1230,7 +1230,7 @@ int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys,
                 * additional locking to prevent other subsystem from being
                 * unregistered
                 */
-               mutex_lock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+               inode_lock(d_inode(root->cg_item.ci_dentry));
 
                /*
                 * As we are trying to depend item from other subsystem
@@ -1254,7 +1254,7 @@ out_root_unlock:
                 * We were called from subsystem other than our target so we
                 * took some locks so now it's time to release them
                 */
-               mutex_unlock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+               inode_unlock(d_inode(root->cg_item.ci_dentry));
 
        return ret;
 }
@@ -1561,7 +1561,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
        down_write(&configfs_rename_sem);
        parent = item->parent->dentry;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
@@ -1577,7 +1577,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
                        error = -EEXIST;
                dput(new_dentry);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        up_write(&configfs_rename_sem);
 
        return error;
@@ -1590,7 +1590,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
        int err;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        /*
         * Fake invisibility if dir belongs to a group/default groups hierarchy
         * being attached
@@ -1603,7 +1603,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
                else
                        err = 0;
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        return err;
 }
@@ -1613,11 +1613,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_path.dentry;
        struct configfs_dirent * cursor = file->private_data;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        spin_lock(&configfs_dirent_lock);
        list_del_init(&cursor->s_sibling);
        spin_unlock(&configfs_dirent_lock);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        release_configfs_dirent(cursor);
 
@@ -1698,7 +1698,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry * dentry = file->f_path.dentry;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -1706,7 +1706,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -1732,7 +1732,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&configfs_dirent_lock);
                }
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return offset;
 }
 
@@ -1767,14 +1767,14 @@ int configfs_register_group(struct config_group *parent_group,
 
        parent = parent_group->cg_item.ci_dentry;
 
-       mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
        ret = create_default_group(parent_group, group);
        if (!ret) {
                spin_lock(&configfs_dirent_lock);
                configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
                spin_unlock(&configfs_dirent_lock);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        return ret;
 }
 EXPORT_SYMBOL(configfs_register_group);
@@ -1791,7 +1791,7 @@ void configfs_unregister_group(struct config_group *group)
        struct dentry *dentry = group->cg_item.ci_dentry;
        struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
 
-       mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
        spin_lock(&configfs_dirent_lock);
        configfs_detach_prep(dentry, NULL);
        spin_unlock(&configfs_dirent_lock);
@@ -1800,7 +1800,7 @@ void configfs_unregister_group(struct config_group *group)
        d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
        d_delete(dentry);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 
        dput(dentry);
 
@@ -1872,7 +1872,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
        sd = root->d_fsdata;
        link_group(to_config_group(sd->s_element), group);
 
-       mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
 
        err = -ENOMEM;
        dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1892,7 +1892,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                }
        }
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        if (err) {
                unlink_group(group);
@@ -1913,9 +1913,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
                return;
        }
 
-       mutex_lock_nested(&d_inode(root)->i_mutex,
+       inode_lock_nested(d_inode(root),
                          I_MUTEX_PARENT);
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        if (configfs_detach_prep(dentry, NULL)) {
@@ -1926,11 +1926,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        configfs_detach_group(&group->cg_item);
        d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        d_delete(dentry);
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        dput(dentry);
 
index 3687187c8ea59e8a6a2795c8e36fa95e897955a7..33b7ee34eda5f135fef480f0bdf55bb4037ab334 100644 (file)
@@ -540,10 +540,10 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
        umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
+       inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
                                     CONFIGFS_ITEM_ATTR);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        return error;
 }
@@ -562,10 +562,10 @@ int configfs_create_bin_file(struct config_item *item,
        umode_t mode = (bin_attr->cb_attr.ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+       inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
                                     CONFIGFS_ITEM_BIN_ATTR);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        return error;
 }
index 0cc810e9dccc15fb764d53a4cb26604b3d16672f..cee087d8f7e02f5b62198a6091a5a7f23cf0109f 100644 (file)
@@ -255,7 +255,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                /* no inode means this hasn't been made visible yet */
                return;
 
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element)
                        continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                        break;
                }
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 }
index 7af8797590640e032c74f619b9fd1931fed77e15..fc2e3141138b285321abdbe059e53dfd40162719 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/pagevec.h>
 #include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
@@ -57,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
        blk_queue_exit(bdev->bd_queue);
 }
 
+struct page *read_dax_sector(struct block_device *bdev, sector_t n)
+{
+       struct page *page = alloc_pages(GFP_KERNEL, 0);
+       struct blk_dax_ctl dax = {
+               .size = PAGE_SIZE,
+               .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
+       };
+       long rc;
+
+       if (!page)
+               return ERR_PTR(-ENOMEM);
+
+       rc = dax_map_atomic(bdev, &dax);
+       if (rc < 0)
+               return ERR_PTR(rc);
+       memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
+       dax_unmap_atomic(bdev, &dax);
+       return page;
+}
+
 /*
  * dax_clear_blocks() is called from within transaction context from XFS,
  * and hence this means the stack from this point must follow GFP_NOFS
@@ -245,13 +266,14 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        loff_t end = pos + iov_iter_count(iter);
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
 
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
                struct address_space *mapping = inode->i_mapping;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = filemap_write_and_wait_range(mapping, pos, end - 1);
                if (retval) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        goto out;
                }
        }
@@ -263,7 +285,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        if ((retval > 0) && end_io)
                end_io(iocb, pos, retval, bh.b_private);
@@ -324,6 +346,200 @@ static int copy_user_bh(struct page *to, struct inode *inode,
        return 0;
 }
 
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+               sector_t sector, bool pmd_entry, bool dirty)
+{
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+       pgoff_t pmd_index = DAX_PMD_INDEX(index);
+       int type, error = 0;
+       void *entry;
+
+       WARN_ON_ONCE(pmd_entry && !dirty);
+       if (dirty)
+               __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+       spin_lock_irq(&mapping->tree_lock);
+
+       entry = radix_tree_lookup(page_tree, pmd_index);
+       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+               index = pmd_index;
+               goto dirty;
+       }
+
+       entry = radix_tree_lookup(page_tree, index);
+       if (entry) {
+               type = RADIX_DAX_TYPE(entry);
+               if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+                                       type != RADIX_DAX_PMD)) {
+                       error = -EIO;
+                       goto unlock;
+               }
+
+               if (!pmd_entry || type == RADIX_DAX_PMD)
+                       goto dirty;
+
+               /*
+                * We only insert dirty PMD entries into the radix tree.  This
+                * means we don't need to worry about removing a dirty PTE
+                * entry and inserting a clean PMD entry, thus reducing the
+                * range we would flush with a follow-up fsync/msync call.
+                */
+               radix_tree_delete(&mapping->page_tree, index);
+               mapping->nrexceptional--;
+       }
+
+       if (sector == NO_SECTOR) {
+               /*
+                * This can happen during correct operation if our pfn_mkwrite
+                * fault raced against a hole punch operation.  If this
+                * happens the pte that was hole punched will have been
+                * unmapped and the radix tree entry will have been removed by
+                * the time we are called, but the call will still happen.  We
+                * will return all the way up to wp_pfn_shared(), where the
+                * pte_same() check will fail, eventually causing page fault
+                * to be retried by the CPU.
+                */
+               goto unlock;
+       }
+
+       error = radix_tree_insert(page_tree, index,
+                       RADIX_DAX_ENTRY(sector, pmd_entry));
+       if (error)
+               goto unlock;
+
+       mapping->nrexceptional++;
+ dirty:
+       if (dirty)
+               radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+       return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+               struct address_space *mapping, pgoff_t index, void *entry)
+{
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+       int type = RADIX_DAX_TYPE(entry);
+       struct radix_tree_node *node;
+       struct blk_dax_ctl dax;
+       void **slot;
+       int ret = 0;
+
+       spin_lock_irq(&mapping->tree_lock);
+       /*
+        * Regular page slots are stabilized by the page lock even
+        * without the tree itself locked.  These unlocked entries
+        * need verification under the tree lock.
+        */
+       if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+               goto unlock;
+       if (*slot != entry)
+               goto unlock;
+
+       /* another fsync thread may have already written back this entry */
+       if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+               goto unlock;
+
+       if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+               ret = -EIO;
+               goto unlock;
+       }
+
+       dax.sector = RADIX_DAX_SECTOR(entry);
+       dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+       spin_unlock_irq(&mapping->tree_lock);
+
+       /*
+        * We cannot hold tree_lock while calling dax_map_atomic() because it
+        * eventually calls cond_resched().
+        */
+       ret = dax_map_atomic(bdev, &dax);
+       if (ret < 0)
+               return ret;
+
+       if (WARN_ON_ONCE(ret < dax.size)) {
+               ret = -EIO;
+               goto unmap;
+       }
+
+       wb_cache_pmem(dax.addr, dax.size);
+
+       spin_lock_irq(&mapping->tree_lock);
+       radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+       spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+       dax_unmap_atomic(bdev, &dax);
+       return ret;
+
+ unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+       return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+               loff_t end)
+{
+       struct inode *inode = mapping->host;
+       struct block_device *bdev = inode->i_sb->s_bdev;
+       pgoff_t start_index, end_index, pmd_index;
+       pgoff_t indices[PAGEVEC_SIZE];
+       struct pagevec pvec;
+       bool done = false;
+       int i, ret = 0;
+       void *entry;
+
+       if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+               return -EIO;
+
+       start_index = start >> PAGE_CACHE_SHIFT;
+       end_index = end >> PAGE_CACHE_SHIFT;
+       pmd_index = DAX_PMD_INDEX(start_index);
+
+       rcu_read_lock();
+       entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+       rcu_read_unlock();
+
+       /* see if the start of our range is covered by a PMD entry */
+       if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+               start_index = pmd_index;
+
+       tag_pages_for_writeback(mapping, start_index, end_index);
+
+       pagevec_init(&pvec, 0);
+       while (!done) {
+               pvec.nr = find_get_entries_tag(mapping, start_index,
+                               PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+                               pvec.pages, indices);
+
+               if (pvec.nr == 0)
+                       break;
+
+               for (i = 0; i < pvec.nr; i++) {
+                       if (indices[i] > end_index) {
+                               done = true;
+                               break;
+                       }
+
+                       ret = dax_writeback_one(bdev, mapping, indices[i],
+                                       pvec.pages[i]);
+                       if (ret < 0)
+                               return ret;
+               }
+       }
+       wmb_pmem();
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                        struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -363,6 +579,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        }
        dax_unmap_atomic(bdev, &dax);
 
+       error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+                       vmf->flags & FAULT_FLAG_WRITE);
+       if (error)
+               goto out;
+
        error = vm_insert_mixed(vma, vaddr, dax.pfn);
 
  out:
@@ -408,6 +629,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
        memset(&bh, 0, sizeof(bh));
        block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+       bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_SIZE;
 
  repeat:
@@ -487,6 +709,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                delete_from_page_cache(page);
                unlock_page(page);
                page_cache_release(page);
+               page = NULL;
        }
 
        /*
@@ -590,7 +813,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        struct block_device *bdev;
        pgoff_t size, pgoff;
        sector_t block;
-       int result = 0;
+       int error, result = 0;
+       bool alloc = false;
 
        /* dax pmd mappings require pfn_t_devmap() */
        if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +848,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        }
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
        block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
        bh.b_size = PMD_SIZE;
-       if (get_block(inode, block, &bh, write) != 0)
+
+       if (get_block(inode, block, &bh, 0) != 0)
                return VM_FAULT_SIGBUS;
+
+       if (!buffer_mapped(&bh) && write) {
+               if (get_block(inode, block, &bh, 1) != 0)
+                       return VM_FAULT_SIGBUS;
+               alloc = true;
+       }
+
        bdev = bh.b_bdev;
-       i_mmap_lock_read(mapping);
 
        /*
         * If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +871,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         */
        if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
                dax_pmd_dbg(&bh, address, "allocated block too small");
-               goto fallback;
+               return VM_FAULT_FALLBACK;
        }
 
        /*
         * If we allocated new storage, make sure no process has any
         * zero pages covering this hole
         */
-       if (buffer_new(&bh)) {
-               i_mmap_unlock_read(mapping);
-               unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-               i_mmap_lock_read(mapping);
+       if (alloc) {
+               loff_t lstart = pgoff << PAGE_SHIFT;
+               loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+               truncate_pagecache_range(inode, lstart, lend);
        }
 
+       i_mmap_lock_read(mapping);
+
        /*
         * If a truncate happened while we were allocating blocks, we may
         * leave blocks allocated to the file that are beyond EOF.  We can't
@@ -664,7 +899,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                goto out;
        }
        if ((pgoff | PG_PMD_COLOUR) >= size) {
-               dax_pmd_dbg(&bh, address, "pgoff unaligned");
+               dax_pmd_dbg(&bh, address,
+                               "offset + huge page size > file size");
                goto fallback;
        }
 
@@ -732,6 +968,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                }
                dax_unmap_atomic(bdev, &dax);
 
+               /*
+                * For PTE faults we insert a radix tree entry for reads, and
+                * leave it clean.  Then on the first write we dirty the radix
+                * tree entry via the dax_pfn_mkwrite() path.  This sequence
+                * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+                * call into get_block() to translate the pgoff to a sector in
+                * order to be able to create a new radix tree entry.
+                *
+                * The PMD path doesn't have an equivalent to
+                * dax_pfn_mkwrite(), though, so for a read followed by a
+                * write we traverse all the way through __dax_pmd_fault()
+                * twice.  This means we can just skip inserting a radix tree
+                * entry completely on the initial read and just wait until
+                * the write to insert a dirty entry.
+                */
+               if (write) {
+                       error = dax_radix_entry(mapping, pgoff, dax.sector,
+                                       true, true);
+                       if (error) {
+                               dax_pmd_dbg(&bh, address,
+                                               "PMD radix insertion failed");
+                               goto fallback;
+                       }
+               }
+
                dev_dbg(part_to_dev(bdev->bd_part),
                                "%s: %s addr: %lx pfn: %lx sect: %llx\n",
                                __func__, current->comm, address,
@@ -790,15 +1051,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
- *
  */
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct file *file = vma->vm_file;
 
-       sb_start_pagefault(sb);
-       file_update_time(vma->vm_file);
-       sb_end_pagefault(sb);
+       /*
+        * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+        * RADIX_DAX_PTE entry already exists in the radix tree from a
+        * previous call to __dax_fault().  We just want to look up that PTE
+        * entry using vmf->pgoff and make sure the dirty tag is set.  This
+        * saves us from having to make a call to get_block() here to look
+        * up the sector.
+        */
+       dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1101,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
        BUG_ON((offset + length) > PAGE_CACHE_SIZE);
 
        memset(&bh, 0, sizeof(bh));
+       bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_CACHE_SIZE;
        err = get_block(inode, index, &bh, 0);
        if (err < 0)
index b4539e84e5779a48c470804f9051ca5f3142eb48..92d5140de8516c642b6ea5af81f0ffe89399a416 100644 (file)
@@ -2462,7 +2462,7 @@ EXPORT_SYMBOL(d_rehash);
  */
 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 {
-       BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
+       BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 
        spin_lock(&dentry->d_lock);
@@ -2738,7 +2738,7 @@ static int __d_unalias(struct inode *inode,
        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
                goto out_err;
        m1 = &dentry->d_sb->s_vfs_rename_mutex;
-       if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+       if (!inode_trylock(alias->d_parent->d_inode))
                goto out_err;
        m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
index b7fcc0de0b2f2771aff08a130eb20564adb3cbd4..bece948b363df5b8d331252cd9468d1bfebead68 100644 (file)
@@ -265,7 +265,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = debugfs_mount->mnt_root;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                dput(dentry);
@@ -273,7 +273,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        }
 
        if (IS_ERR(dentry)) {
-               mutex_unlock(&d_inode(parent)->i_mutex);
+               inode_unlock(d_inode(parent));
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        }
 
@@ -282,7 +282,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+       inode_unlock(d_inode(dentry->d_parent));
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        return NULL;
@@ -290,7 +290,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+       inode_unlock(d_inode(dentry->d_parent));
        return dentry;
 }
 
@@ -560,9 +560,9 @@ void debugfs_remove(struct dentry *dentry)
        if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        ret = __debugfs_remove(dentry, parent);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        if (!ret)
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
@@ -594,7 +594,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        parent = dentry;
  down:
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -609,7 +609,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&d_inode(parent)->i_mutex);
+                       inode_unlock(d_inode(parent));
                        parent = child;
                        goto down;
                }
@@ -630,10 +630,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
 
        if (child != dentry)
                /* go up */
@@ -641,7 +641,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        if (!__debugfs_remove(child, parent))
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 
index c35ffdc12bbafd1874dcea43ec5094867daf3e0d..1f107fd513286f0f3e8be601cd715ea0ee5443e8 100644 (file)
@@ -255,7 +255,7 @@ static int mknod_ptmx(struct super_block *sb)
        if (!uid_valid(root_uid) || !gid_valid(root_gid))
                return -EINVAL;
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        /* If we have already created ptmx node, return */
        if (fsi->ptmx_dentry) {
@@ -292,7 +292,7 @@ static int mknod_ptmx(struct super_block *sb)
        fsi->ptmx_dentry = dentry;
        rc = 0;
 out:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        return rc;
 }
 
@@ -615,7 +615,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
 
        sprintf(s, "%d", index);
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_alloc_name(root, s);
        if (dentry) {
@@ -626,7 +626,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
                inode = ERR_PTR(-ENOMEM);
        }
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        return inode;
 }
@@ -671,7 +671,7 @@ void devpts_pty_kill(struct inode *inode)
 
        BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_find_alias(inode);
 
@@ -680,7 +680,7 @@ void devpts_pty_kill(struct inode *inode)
        dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
        dput(dentry);           /* d_find_alias above */
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 }
 
 static int __init init_devpts_fs(void)
index 602e8441bc0fb6b094ec96dfb3273ff73b8b1506..1b2f7ffc8b841fd16cf312874fe8c7d4c0fa0e8e 100644 (file)
@@ -1157,12 +1157,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                                        iocb->ki_filp->f_mapping;
 
                        /* will be released by direct_io_worker */
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
 
                        retval = filemap_write_and_wait_range(mapping, offset,
                                                              end - 1);
                        if (retval) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                kmem_cache_free(dio_cache, dio);
                                goto out;
                        }
@@ -1173,7 +1173,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        dio->i_size = i_size_read(inode);
        if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
                if (dio->flags & DIO_LOCKING)
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                kmem_cache_free(dio_cache, dio);
                retval = 0;
                goto out;
@@ -1295,7 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         * of protecting us from looking up uninitialized blocks.
         */
        if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
-               mutex_unlock(&dio->inode->i_mutex);
+               inode_unlock(dio->inode);
 
        /*
         * The only time we want to leave bios in flight is when a successful
index 1925d6d222b87f635b9925e05003a3c7c41d7580..58c2f4a21b7f4233b34b70efb0602b297d7f9012 100644 (file)
@@ -516,7 +516,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
                return -EINVAL;
 
        kbuf = memdup_user_nul(buf, count);
-       if (!IS_ERR(kbuf))
+       if (IS_ERR(kbuf))
                return PTR_ERR(kbuf);
 
        if (check_version(kbuf)) {
index 040aa879d634fe15d38f0ce9bf103c7b514122b4..4e685ac1024dc313e56ed8c8245fa5add9bb7c33 100644 (file)
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
        struct dentry *dir;
 
        dir = dget_parent(dentry);
-       mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
        return dir;
 }
 
 static void unlock_dir(struct dentry *dir)
 {
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        dput(dir);
 }
 
@@ -397,11 +397,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        int rc = 0;
 
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dir_dentry));
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
                                      lower_dir_dentry,
                                      ecryptfs_dentry->d_name.len);
-       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dir_dentry));
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -426,11 +426,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                       "filename; rc = [%d]\n", __func__, rc);
                goto out;
        }
-       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dir_dentry));
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
                                      lower_dir_dentry,
                                      encrypted_and_encoded_name_size);
-       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dir_dentry));
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -869,9 +869,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
        if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
                struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
-               mutex_lock(&d_inode(lower_dentry)->i_mutex);
+               inode_lock(d_inode(lower_dentry));
                rc = notify_change(lower_dentry, &lower_ia, NULL);
-               mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+               inode_unlock(d_inode(lower_dentry));
        }
        return rc;
 }
@@ -970,9 +970,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
                lower_ia.ia_valid &= ~ATTR_MODE;
 
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = notify_change(lower_dentry, &lower_ia, NULL);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        fsstack_copy_attr_all(inode, lower_inode);
        return rc;
@@ -1048,10 +1048,10 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
                                                   size);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1075,9 +1075,9 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1092,9 +1092,9 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       inode_lock(d_inode(lower_dentry));
        rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
-       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+       inode_unlock(d_inode(lower_dentry));
 out:
        return rc;
 }
index caba848ac76335ef0f897cb7cb27c1bbf520c1de..c6ced4cbf0cff7d3b3f754ddbf0727d44bca8809 100644 (file)
@@ -436,7 +436,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
                rc = -ENOMEM;
                goto out;
        }
-       mutex_lock(&lower_inode->i_mutex);
+       inode_lock(lower_inode);
        size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                           xattr_virt, PAGE_CACHE_SIZE);
        if (size < 0)
@@ -444,7 +444,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
        put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
        rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
                                         xattr_virt, size, 0);
-       mutex_unlock(&lower_inode->i_mutex);
+       inode_unlock(lower_inode);
        if (rc)
                printk(KERN_ERR "Error whilst attempting to write inode size "
                       "to lower file xattr; rc = [%d]\n", rc);
index 90001da9abfdfe98b01a0eacdb5381425234b126..c424e4813ec8019b5f8a62feb8eb51c6d13f7f97 100644 (file)
@@ -50,9 +50,9 @@ static ssize_t efivarfs_file_write(struct file *file,
                d_delete(file->f_path.dentry);
                dput(file->f_path.dentry);
        } else {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, datasize + sizeof(attributes));
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        bytes = count;
index 86a2121828c312e53d64aedee9506319bbadf6c1..b8a564f29107b6d38e90b54d99c86f4aa4b96532 100644 (file)
@@ -160,10 +160,10 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
        efivar_entry_size(entry, &size);
        efivar_entry_add(entry, &efivarfs_list);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        inode->i_private = entry;
        i_size_write(inode, size + sizeof(entry->var.Attributes));
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        d_add(dentry, inode);
 
        return 0;
index ae1dbcf47e979d48b67ee83e1ec413e4d119a48b..cde60741cad2c4cc429f04e80f29e788b0e0d673 100644 (file)
 /* Epoll private bits inside the event mask */
 #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
 
+#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+                               EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
+
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
@@ -1068,7 +1073,22 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
         * wait list.
         */
        if (waitqueue_active(&ep->wq)) {
-               ewake = 1;
+               if ((epi->event.events & EPOLLEXCLUSIVE) &&
+                                       !((unsigned long)key & POLLFREE)) {
+                       switch ((unsigned long)key & EPOLLINOUT_BITS) {
+                       case POLLIN:
+                               if (epi->event.events & POLLIN)
+                                       ewake = 1;
+                               break;
+                       case POLLOUT:
+                               if (epi->event.events & POLLOUT)
+                                       ewake = 1;
+                               break;
+                       case 0:
+                               ewake = 1;
+                               break;
+                       }
+               }
                wake_up_locked(&ep->wq);
        }
        if (waitqueue_active(&ep->poll_wait))
@@ -1875,9 +1895,13 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
         * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
         * Also, we do not currently supported nested exclusive wakeups.
         */
-       if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD ||
-               (op == EPOLL_CTL_ADD && is_file_epoll(tf.file))))
-               goto error_tgt_fput;
+       if (epds.events & EPOLLEXCLUSIVE) {
+               if (op == EPOLL_CTL_MOD)
+                       goto error_tgt_fput;
+               if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
+                               (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+                       goto error_tgt_fput;
+       }
 
        /*
         * At this point it is safe to assume that the "private_data" contains
@@ -1950,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                break;
        case EPOLL_CTL_MOD:
                if (epi) {
-                       epds.events |= POLLERR | POLLHUP;
-                       error = ep_modify(ep, epi, &epds);
+                       if (!(epi->event.events & EPOLLEXCLUSIVE)) {
+                               epds.events |= POLLERR | POLLHUP;
+                               error = ep_modify(ep, epi, &epds);
+                       }
                } else
                        error = -ENOENT;
                break;
index 828ec5f07de00b7a21d016c022afc68ee0962331..dcd4ac7d3f1e77b45fde8a84585150a4862b84d9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1307,13 +1307,13 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
                return;
 
        /* Be careful if suid/sgid is set */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* reload atomically mode/uid/gid now that lock held */
        mode = inode->i_mode;
        uid = inode->i_uid;
        gid = inode->i_gid;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        /* We ignore suid/sgid if there are no mappings for them in the ns */
        if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
index 906de66e8e7e067e6179aa3408c6e6aa8ea528b7..28645f0640f735b7f168b14258cc22c110bc0f1d 100644 (file)
@@ -52,9 +52,9 @@ static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = sync_inode_metadata(filp->f_mapping->host, 1);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 714cd37a6ba30fd970b8384a2c2b26fc5209351f..c46f1a190b8d9ecedd8157530ecfc3ec9ccf16c4 100644 (file)
@@ -124,10 +124,10 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
        int err;
 
        parent = ERR_PTR(-EACCES);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        if (mnt->mnt_sb->s_export_op->get_parent)
                parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
 
        if (IS_ERR(parent)) {
                dprintk("%s: get_parent of %ld failed, err %d\n",
@@ -143,9 +143,9 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
        if (err)
                goto out_err;
        dprintk("%s: found name: %s\n", __func__, nbuf);
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (IS_ERR(tmp)) {
                dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
                goto out_err;
@@ -503,10 +503,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
                 */
                err = exportfs_get_name(mnt, target_dir, nbuf, result);
                if (!err) {
-                       mutex_lock(&target_dir->d_inode->i_mutex);
+                       inode_lock(target_dir->d_inode);
                        nresult = lookup_one_len(nbuf, target_dir,
                                                 strlen(nbuf));
-                       mutex_unlock(&target_dir->d_inode->i_mutex);
+                       inode_unlock(target_dir->d_inode);
                        if (!IS_ERR(nresult)) {
                                if (nresult->d_inode) {
                                        dput(result);
index 11a42c5a09aee553bd85db0154be89dbd1b374cc..2c88d683cd918d673c83390aea187e763b5aa78e 100644 (file)
@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 {
        struct inode *inode = file_inode(vma->vm_file);
        struct ext2_inode_info *ei = EXT2_I(inode);
-       int ret = VM_FAULT_NOPAGE;
        loff_t size;
+       int ret;
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
+       else
+               ret = dax_pfn_mkwrite(vma, vmf);
 
        up_read(&ei->dax_sem);
        sb_end_pagefault(inode->i_sb);
index 5d46c09863f09408258696171dfbec2b72eab994..b386af2e45f41fda4f965eafa5751ab10dc48bc3 100644 (file)
@@ -51,10 +51,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext2_mask_flags(inode->i_mode, flags);
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                /* Is it quota file? Do not allow user to mess with it */
                if (IS_NOQUOTA(inode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        ret = -EPERM;
                        goto setflags_out;
                }
@@ -68,7 +68,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                 */
                if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                ret = -EPERM;
                                goto setflags_out;
                        }
@@ -80,7 +80,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                ext2_set_inode_flags(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                mark_inode_dirty(inode);
 setflags_out:
@@ -102,10 +102,10 @@ setflags_out:
                        goto setversion_out;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                inode->i_generation = generation;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                mark_inode_dirty(inode);
 setversion_out:
index 1a0835073663ff3f1dad1f376328de5e2b1332ce..c8021208a7eb1a98ba32e16a8536146f570d5c4a 100644 (file)
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
                                EXT4_DECRYPT, page->index, page, page);
 }
 
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len)
 {
        struct ext4_crypto_ctx  *ctx;
        struct page             *ciphertext_page = NULL;
        struct bio              *bio;
-       ext4_lblk_t             lblk = le32_to_cpu(ex->ee_block);
-       ext4_fsblk_t            pblk = ext4_ext_pblock(ex);
-       unsigned int            len = ext4_ext_get_actual_len(ex);
        int                     ret, err = 0;
 
 #if 0
index c5882b36e5582d0005582d0fe3ac86cab70d9c9c..9a16d1e75a493f9e4663bb4ea05b9da9980ba65d 100644 (file)
@@ -213,9 +213,11 @@ retry:
                res = -ENOKEY;
                goto out;
        }
+       down_read(&keyring_key->sem);
        ukp = user_key_payload(keyring_key);
        if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
                res = -EINVAL;
+               up_read(&keyring_key->sem);
                goto out;
        }
        master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ retry:
                            "ext4: key size incorrect: %d\n",
                            master_key->size);
                res = -ENOKEY;
+               up_read(&keyring_key->sem);
                goto out;
        }
        res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
                                  raw_key);
+       up_read(&keyring_key->sem);
        if (res)
                goto out;
 got_key:
index cc7ca4e87144a540332213ce03b63e80e601f40e..0662b285dc8a71982a54e5895d58d797c7bcf6a4 100644 (file)
@@ -378,14 +378,22 @@ struct flex_groups {
 #define EXT4_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
 #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE           0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE                0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE           0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE                0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE          (EXT4_SYNC_FL | \
+                                        EXT4_IMMUTABLE_FL | \
+                                        EXT4_APPEND_FL | \
+                                        EXT4_NODUMP_FL | \
+                                        EXT4_NOATIME_FL | \
+                                        EXT4_PROJINHERIT_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
                           EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
-                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+                          EXT4_PROJINHERIT_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
 #define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
        /* Request will not result in inode size update (user for fallocate) */
 #define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
-       /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK                        0x0100
        /* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0100
+       /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO                   0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO            (EXT4_GET_BLOCKS_CREATE |\
+                                       EXT4_GET_BLOCKS_ZERO)
 
 /*
  * The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
 #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
 #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
 
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR              _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
         * by other means, so we have i_data_sem.
         */
        struct rw_semaphore i_data_sem;
+       /*
+        * i_mmap_sem is for serializing page faults with truncate / punch hole
+        * operations. We have to make sure that new page cannot be faulted in
+        * a section of the inode that is being punched. We cannot easily use
+        * i_data_sem for this since we need protection for the whole punch
+        * operation and i_data_sem ranks below transaction start so we have
+        * to occasionally drop it.
+        */
+       struct rw_semaphore i_mmap_sem;
        struct inode vfs_inode;
        struct jbd2_inode *jinode;
 
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
        /* Encryption params */
        struct ext4_crypt_info *i_crypt_info;
 #endif
+       kprojid_t i_projid;
 };
 
 /*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
 #endif
 
 /* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
 
 /*
  * fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt,              ENCRYPT)
                                         EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
                                         EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
-                                        EXT4_FEATURE_RO_COMPAT_QUOTA)
+                                        EXT4_FEATURE_RO_COMPAT_QUOTA |\
+                                        EXT4_FEATURE_RO_COMPAT_PROJECT)
 
 #define EXTN_FEATURE_FUNCS(ver) \
 static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
 #define        EXT4_DEF_RESUID         0
 #define        EXT4_DEF_RESGID         0
 
+/*
+ * Default project ID
+ */
+#define        EXT4_DEF_PROJID         0
+
 #define EXT4_DEF_INODE_READAHEAD_BLKS  32
 
 /*
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
 struct page *ext4_encrypt(struct inode *inode,
                          struct page *plaintext_page);
 int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len);
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_init_crypto(void);
@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
                                struct buffer_head *bh_result, int create);
 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
                             loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
 extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                             ext4_fsblk_t pblk, ext4_lblk_t len);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2825,7 +2896,7 @@ do {                                                              \
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
 {
        WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        down_write(&EXT4_I(inode)->i_data_sem);
        if (newsize > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = newsize;
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
        return changed;
 }
 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len);
+
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
                                         struct page *page);
 extern int ext4_try_add_inline_entry(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
-                                    struct inode *inode);
+                                    struct inode *dir, struct inode *inode);
 extern int ext4_try_create_inline_dir(handle_t *handle,
                                      struct inode *parent,
                                      struct inode *inode);
index 551353b1b17ab930ead8eff62ee3a699b4cfe10f..0ffabaf90aa5d19914aaf9e3cbeca44922f15f86 100644 (file)
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 {
        ext4_fsblk_t ee_pblock;
        unsigned int ee_len;
-       int ret;
 
        ee_len    = ext4_ext_get_actual_len(ex);
        ee_pblock = ext4_ext_pblock(ex);
-
-       if (ext4_encrypted_inode(inode))
-               return ext4_encrypted_zeroout(inode, ex);
-
-       ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
-       if (ret > 0)
-               ret = 0;
-
-       return ret;
+       return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+                                 ee_len);
 }
 
 /*
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        }
        /* IO end_io complete, convert the filled extent to written */
        if (flags & EXT4_GET_BLOCKS_CONVERT) {
+               if (flags & EXT4_GET_BLOCKS_ZERO) {
+                       if (allocated > map->m_len)
+                               allocated = map->m_len;
+                       err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+                                                allocated);
+                       if (err < 0)
+                               goto out2;
+               }
                ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                           ppath);
                if (ret >= 0) {
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
-
        /*
         * credits to insert 1 extent into extent tree
         */
@@ -4752,8 +4748,6 @@ retry:
                goto retry;
        }
 
-       ext4_inode_resume_unlocked_dio(inode);
-
        return ret > 0 ? ret2 : ret;
 }
 
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        int partial_begin, partial_end;
        loff_t start, end;
        ext4_lblk_t lblk;
-       struct address_space *mapping = inode->i_mapping;
        unsigned int blkbits = inode->i_blkbits;
 
        trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                        return ret;
        }
 
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               ret = filemap_write_and_wait_range(mapping, offset,
-                                                  offset + len - 1);
-               if (ret)
-                       return ret;
-       }
-
        /*
         * Round up offset. This is not fallocate, we neet to zero out
         * blocks, so convert interior block aligned part of the range to
@@ -4817,7 +4799,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        else
                max_blocks -= lblk;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Indirect files do not support unwritten extnets
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        /* Preallocate the range including the unaligned edges */
        if (partial_begin || partial_end) {
                ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                                 round_down(offset, 1 << blkbits)) >> blkbits,
                                new_size, flags, mode);
                if (ret)
-                       goto out_mutex;
+                       goto out_dio;
 
        }
 
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
                          EXT4_EX_NOCACHE);
 
-               /* Now release the pages and zero block aligned part of pages*/
+               /*
+                * Prevent page faults from reinstantiating pages we have
+                * released from page cache.
+                */
+               down_write(&EXT4_I(inode)->i_mmap_sem);
+               ret = ext4_update_disksize_before_punch(inode, offset, len);
+               if (ret) {
+                       up_write(&EXT4_I(inode)->i_mmap_sem);
+                       goto out_dio;
+               }
+               /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
                inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
-               /* Wait all existing dio workers, newcomers will block on i_mutex */
-               ext4_inode_block_unlocked_dio(inode);
-               inode_dio_wait(inode);
-
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags, mode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
                if (ret)
                        goto out_dio;
        }
@@ -4909,7 +4902,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 out_dio:
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -4980,7 +4973,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * We only support preallocation for extent-based files only
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                     flags, mode);
+       ext4_inode_resume_unlocked_dio(inode);
        if (ret)
                goto out;
 
@@ -5008,7 +5006,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                                                EXT4_I(inode)->i_sync_tid);
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
        return ret;
 }
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down offset to be aligned with page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
-       mutex_lock(&inode->i_mutex);
-
+       inode_lock(inode);
        /*
         * There is no need to overlap collapse range with EOF, in which case
         * it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down offset to be aligned with page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /*
+        * Write tail of the last page before removed range since it will get
+        * removed from the page cache below.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+       if (ret)
+               goto out_mmap;
+       /*
+        * Write data that will be shifted to preserve them when discarding
+        * page cache below. We are also protected from pages becoming dirty
+        * by i_mmap_sem.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+                                          LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,10 +5583,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down to align start offset to page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                       LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
-       mutex_lock(&inode->i_mutex);
-
+       inode_lock(inode);
        /* Currently just for extent based files */
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down to align start offset to page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                       LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        /* Expand file to avoid data loss if there is error while shifting */
@@ -5741,10 +5753,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -5779,8 +5792,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
 
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
-       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
-       BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+       BUG_ON(!inode_is_locked(inode1));
+       BUG_ON(!inode_is_locked(inode2));
 
        *erp = ext4_es_remove_extent(inode1, lblk1, count);
        if (unlikely(*erp))
index 113837e7ba98d5cf866ee365a40a89d7fc781a71..1126436dada19519b97240bcdb42995acae46724 100644 (file)
@@ -113,7 +113,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                ext4_unwritten_wait(inode);
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret <= 0)
                goto out;
@@ -169,7 +169,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        }
 
        ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;
@@ -186,50 +186,42 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        return ret;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (aio_mutex)
                mutex_unlock(aio_mutex);
        return ret;
 }
 
 #ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
-       struct inode *inode = bh->b_assoc_map->host;
-       /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
-       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
-       int err;
-       if (!uptodate)
-               return;
-       WARN_ON(!buffer_unwritten(bh));
-       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        int result;
        handle_t *handle = NULL;
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
 
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                                EXT4_DATA_TRANS_BLOCKS(sb));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
-               result = __dax_fault(vma, vmf, ext4_get_block_dax,
-                                               ext4_end_io_unwritten);
+               result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
@@ -246,44 +238,88 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                ext4_chunk_trans_blocks(inode,
                                                        PMD_SIZE / PAGE_SIZE));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
                result = __dax_pmd_fault(vma, addr, pmd, flags,
-                               ext4_get_block_dax, ext4_end_io_unwritten);
+                               ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_mkwrite(vma, vmf, ext4_get_block_dax,
-                               ext4_end_io_unwritten);
+       int err;
+       struct inode *inode = file_inode(vma->vm_file);
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(inode->i_sb);
+
+       return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+                               struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       loff_t size;
+       int ret;
+
+       sb_start_pagefault(sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       if (vmf->pgoff >= size)
+               ret = VM_FAULT_SIGBUS;
+       else
+               ret = dax_pfn_mkwrite(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(sb);
+
+       return ret;
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
        .fault          = ext4_dax_fault,
        .pmd_fault      = ext4_dax_pmd_fault,
        .page_mkwrite   = ext4_dax_mkwrite,
-       .pfn_mkwrite    = dax_pfn_mkwrite,
+       .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
 };
 #else
 #define ext4_dax_vm_ops        ext4_file_vm_ops
 #endif
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = ext4_filemap_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
 };
@@ -527,11 +563,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
        int blkbits;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return -ENXIO;
        }
 
@@ -579,7 +615,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                dataoff = (loff_t)last << blkbits;
        } while (last <= end);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (dataoff > isize)
                return -ENXIO;
@@ -600,11 +636,11 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
        int blkbits;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return -ENXIO;
        }
 
@@ -655,7 +691,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                break;
        } while (last <= end);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (holeoff > isize)
                holeoff = isize;
index 1b8024d26f654c5458c7e84db5a2c439adc6500e..3fcfd50a2e8a0c05315d823fa1219bea2c87f6c8 100644 (file)
@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
                inode->i_gid = dir->i_gid;
        } else
                inode_init_owner(inode, dir, mode);
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+               ei->i_projid = EXT4_I(dir)->i_projid;
+       else
+               ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
        err = dquot_initialize(inode);
        if (err)
                goto out;
index d884989cc83dd99238a710f8131ab38b1139c7ca..dfe3b9bafc0d2b9cb4957c189290f8da68fb4dc4 100644 (file)
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
  */
 static int ext4_add_dirent_to_inline(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
+                                    struct inode *dir,
                                     struct inode *inode,
                                     struct ext4_iloc *iloc,
                                     void *inline_start, int inline_size)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        int             err;
        struct ext4_dir_entry_2 *de;
 
@@ -1245,12 +1244,11 @@ out:
  * the new created block.
  */
 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
-                             struct dentry *dentry, struct inode *inode)
+                             struct inode *dir, struct inode *inode)
 {
        int ret, inline_size;
        void *inline_start;
        struct ext4_iloc iloc;
-       struct inode *dir = d_inode(dentry->d_parent);
 
        ret = ext4_get_inode_loc(dir, &iloc);
        if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
                                                 EXT4_INLINE_DOTDOT_SIZE;
        inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 
-       ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+       ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
                                        inline_start, inline_size);
        if (ret != -ENOSPC)
                goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
        if (inline_size) {
                inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
 
-               ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+               ret = ext4_add_dirent_to_inline(handle, fname, dir,
                                                inode, &iloc, inline_start,
                                                inline_size);
 
index b3bd912df6bfaf475f9304eba7fb1b9ce6368e87..83bc8bfb3bea8eeefed38ca46ae8260779222405 100644 (file)
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
        return 0;
 }
 
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                      ext4_lblk_t len)
+{
+       int ret;
+
+       if (ext4_encrypted_inode(inode))
+               return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+       ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+       if (ret > 0)
+               ret = 0;
+
+       return ret;
+}
+
 #define check_block_validity(inode, map)       \
        __check_block_validity((inode), __func__, __LINE__, (map))
 
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
         * out taking i_data_sem.  So at the time the unwritten extent
         * could be converted.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
        /*
         * We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                if (ret < 0)
                        retval = ret;
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
 found:
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -625,6 +636,22 @@ found:
                        WARN_ON(1);
                }
 
+               /*
+                * We have to zeroout blocks before inserting them into extent
+                * status tree. Otherwise someone could look them up there and
+                * use them before they are really zeroed.
+                */
+               if (flags & EXT4_GET_BLOCKS_ZERO &&
+                   map->m_flags & EXT4_MAP_MAPPED &&
+                   map->m_flags & EXT4_MAP_NEW) {
+                       ret = ext4_issue_zeroout(inode, map->m_lblk,
+                                                map->m_pblk, map->m_len);
+                       if (ret) {
+                               retval = ret;
+                               goto out_sem;
+                       }
+               }
+
                /*
                 * If the extent has been zeroed out, we don't need to update
                 * extent status tree.
@@ -632,7 +659,7 @@ found:
                if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
                    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                        if (ext4_es_is_written(&es))
-                               goto has_zeroout;
+                               goto out_sem;
                }
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
                        status |= EXTENT_STATUS_DELAYED;
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                            map->m_pblk, status);
-               if (ret < 0)
+               if (ret < 0) {
                        retval = ret;
+                       goto out_sem;
+               }
        }
 
-has_zeroout:
+out_sem:
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                ret = check_block_validity(inode, map);
@@ -674,7 +703,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
 
-       if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+       if (flags && !handle) {
                /* Direct IO write... */
                if (map.m_len > DIO_MAX_BLOCKS)
                        map.m_len = DIO_MAX_BLOCKS;
@@ -694,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
 
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-               if (IS_DAX(inode) && buffer_unwritten(bh)) {
-                       /*
-                        * dgc: I suspect unwritten conversion on ext4+DAX is
-                        * fundamentally broken here when there are concurrent
-                        * read/write in progress on this inode.
-                        */
-                       WARN_ON_ONCE(io_end);
-                       bh->b_assoc_map = inode->i_mapping;
-                       bh->b_private = (void *)(unsigned long)iblock;
-               }
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -879,9 +898,6 @@ int do_journal_get_write_access(handle_t *handle,
        return ret;
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create);
-
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                                  get_block_t *get_block)
@@ -3054,25 +3070,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
                               EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create)
 {
-       ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+       int ret;
+
+       ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result,
-                              EXT4_GET_BLOCKS_NO_LOCK);
+       ret = _ext4_get_block(inode, iblock, bh_result, 0);
+       /*
+        * Blocks should have been preallocated! ext4_file_write_iter() checks
+        * that.
+        */
+       WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+       return ret;
 }
 
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create)
 {
-       int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+       int ret, err;
+       int credits;
+       struct ext4_map_blocks map;
+       handle_t *handle = NULL;
+       int flags = 0;
+
+       ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result, flags);
+       map.m_lblk = iblock;
+       map.m_len = bh_result->b_size >> inode->i_blkbits;
+       credits = ext4_chunk_trans_blocks(inode, map.m_len);
+       if (create) {
+               flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       return ret;
+               }
+       }
+
+       ret = ext4_map_blocks(handle, inode, &map, flags);
+       if (create) {
+               err = ext4_journal_stop(handle);
+               if (ret >= 0 && err < 0)
+                       ret = err;
+       }
+       if (ret <= 0)
+               goto out;
+       if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+               int err2;
+
+               /*
+                * We are protected by i_mmap_sem so we know block cannot go
+                * away from under us even though we dropped i_data_sem.
+                * Convert extent to written and write zeros there.
+                *
+                * Note: We may get here even when create == 0.
+                */
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+
+               err = ext4_map_blocks(handle, inode, &map,
+                     EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (err < 0)
+                       ret = err;
+               err2 = ext4_journal_stop(handle);
+               if (err2 < 0 && ret > 0)
+                       ret = err2;
+       }
+out:
+       WARN_ON_ONCE(ret == 0 && create);
+       if (ret > 0) {
+               map_bh(bh_result, inode->i_sb, map.m_pblk);
+               bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+                                       map.m_flags;
+               /*
+                * At least for now we have to clear BH_New so that DAX code
+                * doesn't attempt to zero blocks again in a racy way.
+                */
+               bh_result->b_state &= ~(1 << BH_New);
+               bh_result->b_size = map.m_len << inode->i_blkbits;
+               ret = 0;
+       }
+       return ret;
 }
+#endif
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private)
@@ -3143,10 +3230,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        /* If we do a overwrite dio, i_mutex locking can be released */
        overwrite = *((int *)iocb->private);
 
-       if (overwrite) {
-               down_read(&EXT4_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
-       }
+       if (overwrite)
+               inode_unlock(inode);
 
        /*
         * We could direct write to holes and fallocate.
@@ -3189,7 +3274,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        }
 
        if (overwrite) {
-               get_block_func = ext4_get_block_write_nolock;
+               get_block_func = ext4_get_block_overwrite;
        } else {
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
@@ -3245,10 +3330,8 @@ retake_lock:
        if (iov_iter_rw(iter) == WRITE)
                inode_dio_end(inode);
        /* take i_mutex locking again if we do a ovewrite dio */
-       if (overwrite) {
-               up_read(&EXT4_I(inode)->i_data_sem);
-               mutex_lock(&inode->i_mutex);
-       }
+       if (overwrite)
+               inode_lock(inode);
 
        return ret;
 }
@@ -3558,6 +3641,35 @@ int ext4_can_truncate(struct inode *inode)
        return 0;
 }
 
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+{
+       handle_t *handle;
+       loff_t size = i_size_read(inode);
+
+       WARN_ON(!inode_is_locked(inode));
+       if (offset > size || offset + len < size)
+               return 0;
+
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       ext4_update_i_disksize(inode, size);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+       return 0;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
  * associated with the given offset and length
@@ -3595,7 +3707,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                        return ret;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* No need to punch hole beyond i_size */
        if (offset >= inode->i_size)
@@ -3623,17 +3735,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
        /* Now release the pages and zero block aligned part of pages*/
-       if (last_block_offset > first_block_offset)
+       if (last_block_offset > first_block_offset) {
+               ret = ext4_update_disksize_before_punch(inode, offset, length);
+               if (ret)
+                       goto out_dio;
                truncate_pagecache_range(inode, first_block_offset,
                                         last_block_offset);
-
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
+       }
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
@@ -3680,19 +3801,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-       /* Now release the pages again to reduce race window */
-       if (last_block_offset > first_block_offset)
-               truncate_pagecache_range(inode, first_block_offset,
-                                        last_block_offset);
-
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 out_stop:
        ext4_journal_stop(handle);
 out_dio:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -3762,7 +3879,7 @@ void ext4_truncate(struct inode *inode)
         * have i_mutex locked because it's not necessary.
         */
        if (!(inode->i_state & (I_NEW|I_FREEING)))
-               WARN_ON(!mutex_is_locked(&inode->i_mutex));
+               WARN_ON(!inode_is_locked(inode));
        trace_ext4_truncate_enter(inode);
 
        if (!ext4_can_truncate(inode))
@@ -4076,6 +4193,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
                EXT4_I(inode)->i_inline_off = 0;
 }
 
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+               return -EOPNOTSUPP;
+       *projid = EXT4_I(inode)->i_projid;
+       return 0;
+}
+
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
        struct ext4_iloc iloc;
@@ -4087,6 +4212,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        int block;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        inode = iget_locked(sb, ino);
        if (!inode)
@@ -4136,12 +4262,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
        i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+       else
+               i_projid = EXT4_DEF_PROJID;
+
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
        i_uid_write(inode, i_uid);
        i_gid_write(inode, i_gid);
+       ei->i_projid = make_kprojid(&init_user_ns, i_projid);
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
        ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
@@ -4440,6 +4574,7 @@ static int ext4_do_update_inode(handle_t *handle,
        int need_datasync = 0, set_large_file = 0;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        spin_lock(&ei->i_raw_lock);
 
@@ -4452,6 +4587,7 @@ static int ext4_do_update_inode(handle_t *handle,
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
        i_uid = i_uid_read(inode);
        i_gid = i_gid_read(inode);
+       i_projid = from_kprojid(&init_user_ns, ei->i_projid);
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
                raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4529,6 +4665,15 @@ static int ext4_do_update_inode(handle_t *handle,
                                cpu_to_le16(ei->i_extra_isize);
                }
        }
+
+       BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+              i_projid != EXT4_DEF_PROJID);
+
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               raw_inode->i_projid = cpu_to_le32(i_projid);
+
        ext4_inode_csum_set(inode, raw_inode, ei);
        spin_unlock(&ei->i_raw_lock);
        if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4824,6 +4969,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        } else
                                ext4_wait_for_tail_page_commit(inode);
                }
+               down_write(&EXT4_I(inode)->i_mmap_sem);
                /*
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
@@ -4831,6 +4977,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                truncate_pagecache(inode, inode->i_size);
                if (shrink)
                        ext4_truncate(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
        }
 
        if (!rc) {
@@ -5279,6 +5426,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -5348,6 +5497,19 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+       up_read(&EXT4_I(inode)->i_mmap_sem);
        sb_end_pagefault(inode->i_sb);
        return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+
+       return err;
+}
index 5e872fd40e5e38655435fbcf91802244c9ce8959..0f6c36922c2466116d610e8092bd17b64285c29c 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mount.h>
 #include <linux/file.h>
 #include <linux/random.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
        return 1;
 }
 
+static int ext4_ioctl_setflags(struct inode *inode,
+                              unsigned int flags)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       handle_t *handle = NULL;
+       int err = EPERM, migrate = 0;
+       struct ext4_iloc iloc;
+       unsigned int oldflags, mask, i;
+       unsigned int jflag;
+
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto flags_out;
+
+       oldflags = ei->i_flags;
+
+       /* The JOURNAL_DATA flag is modifiable only by root */
+       jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+       /*
+        * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+        * the relevant capability.
+        *
+        * This test looks nicer. Thanks to Pauline Middelink
+        */
+       if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+               if (!capable(CAP_LINUX_IMMUTABLE))
+                       goto flags_out;
+       }
+
+       /*
+        * The JOURNAL_DATA flag can only be changed by
+        * the relevant capability.
+        */
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+               if (!capable(CAP_SYS_RESOURCE))
+                       goto flags_out;
+       }
+       if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+               migrate = 1;
+
+       if (flags & EXT4_EOFBLOCKS_FL) {
+               /* we don't support adding EOFBLOCKS flag */
+               if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+                       err = -EOPNOTSUPP;
+                       goto flags_out;
+               }
+       } else if (oldflags & EXT4_EOFBLOCKS_FL)
+               ext4_truncate(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto flags_out;
+       }
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto flags_err;
+
+       for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+               if (!(mask & EXT4_FL_USER_MODIFIABLE))
+                       continue;
+               if (mask & flags)
+                       ext4_set_inode_flag(inode, i);
+               else
+                       ext4_clear_inode_flag(inode, i);
+       }
+
+       ext4_set_inode_flags(inode);
+       inode->i_ctime = ext4_current_time(inode);
+
+       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+       ext4_journal_stop(handle);
+       if (err)
+               goto flags_out;
+
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+               err = ext4_change_inode_journal_flag(inode, jflag);
+       if (err)
+               goto flags_out;
+       if (migrate) {
+               if (flags & EXT4_EXTENTS_FL)
+                       err = ext4_ext_migrate(inode);
+               else
+                       err = ext4_ind_migrate(inode);
+       }
+
+flags_out:
+       return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       struct inode *inode = file_inode(filp);
+       struct super_block *sb = inode->i_sb;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int err, rc;
+       handle_t *handle;
+       kprojid_t kprojid;
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+               if (projid != EXT4_DEF_PROJID)
+                       return -EOPNOTSUPP;
+               else
+                       return 0;
+       }
+
+       if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+               return -EOPNOTSUPP;
+
+       kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+       if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+               return 0;
+
+       err = mnt_want_write_file(filp);
+       if (err)
+               return err;
+
+       err = -EPERM;
+       inode_lock(inode);
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto out_unlock;
+
+       err = ext4_get_inode_loc(inode, &iloc);
+       if (err)
+               goto out_unlock;
+
+       raw_inode = ext4_raw_inode(&iloc);
+       if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+               err = -EOVERFLOW;
+               brelse(iloc.bh);
+               goto out_unlock;
+       }
+       brelse(iloc.bh);
+
+       dquot_initialize(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+               EXT4_QUOTA_INIT_BLOCKS(sb) +
+               EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto out_unlock;
+       }
+
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto out_stop;
+
+       if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+               struct dquot *transfer_to[MAXQUOTAS] = { };
+
+               transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+               if (transfer_to[PRJQUOTA]) {
+                       err = __dquot_transfer(inode, transfer_to);
+                       dqput(transfer_to[PRJQUOTA]);
+                       if (err)
+                               goto out_dirty;
+               }
+       }
+       EXT4_I(inode)->i_projid = kprojid;
+       inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+       rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+       if (!err)
+               err = rc;
+out_stop:
+       ext4_journal_stop(handle);
+out_unlock:
+       inode_unlock(inode);
+       mnt_drop_write_file(filp);
+       return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       if (projid != EXT4_DEF_PROJID)
+               return -EOPNOTSUPP;
+       return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+       __u32 xflags = 0;
+
+       if (iflags & EXT4_SYNC_FL)
+               xflags |= FS_XFLAG_SYNC;
+       if (iflags & EXT4_IMMUTABLE_FL)
+               xflags |= FS_XFLAG_IMMUTABLE;
+       if (iflags & EXT4_APPEND_FL)
+               xflags |= FS_XFLAG_APPEND;
+       if (iflags & EXT4_NODUMP_FL)
+               xflags |= FS_XFLAG_NODUMP;
+       if (iflags & EXT4_NOATIME_FL)
+               xflags |= FS_XFLAG_NOATIME;
+       if (iflags & EXT4_PROJINHERIT_FL)
+               xflags |= FS_XFLAG_PROJINHERIT;
+       return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+       unsigned long iflags = 0;
+
+       if (xflags & FS_XFLAG_SYNC)
+               iflags |= EXT4_SYNC_FL;
+       if (xflags & FS_XFLAG_IMMUTABLE)
+               iflags |= EXT4_IMMUTABLE_FL;
+       if (xflags & FS_XFLAG_APPEND)
+               iflags |= EXT4_APPEND_FL;
+       if (xflags & FS_XFLAG_NODUMP)
+               iflags |= EXT4_NODUMP_FL;
+       if (xflags & FS_XFLAG_NOATIME)
+               iflags |= EXT4_NOATIME_FL;
+       if (xflags & FS_XFLAG_PROJINHERIT)
+               iflags |= EXT4_PROJINHERIT_FL;
+
+       return iflags;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
                return put_user(flags, (int __user *) arg);
        case EXT4_IOC_SETFLAGS: {
-               handle_t *handle = NULL;
-               int err, migrate = 0;
-               struct ext4_iloc iloc;
-               unsigned int oldflags, mask, i;
-               unsigned int jflag;
+               int err;
 
                if (!inode_owner_or_capable(inode))
                        return -EACCES;
@@ -235,90 +464,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext4_mask_flags(inode->i_mode, flags);
 
-               err = -EPERM;
-               mutex_lock(&inode->i_mutex);
-               /* Is it quota file? Do not allow user to mess with it */
-               if (IS_NOQUOTA(inode))
-                       goto flags_out;
-
-               oldflags = ei->i_flags;
-
-               /* The JOURNAL_DATA flag is modifiable only by root */
-               jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-               /*
-                * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-                * the relevant capability.
-                *
-                * This test looks nicer. Thanks to Pauline Middelink
-                */
-               if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-                       if (!capable(CAP_LINUX_IMMUTABLE))
-                               goto flags_out;
-               }
-
-               /*
-                * The JOURNAL_DATA flag can only be changed by
-                * the relevant capability.
-                */
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-                       if (!capable(CAP_SYS_RESOURCE))
-                               goto flags_out;
-               }
-               if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-                       migrate = 1;
-
-               if (flags & EXT4_EOFBLOCKS_FL) {
-                       /* we don't support adding EOFBLOCKS flag */
-                       if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-                               err = -EOPNOTSUPP;
-                               goto flags_out;
-                       }
-               } else if (oldflags & EXT4_EOFBLOCKS_FL)
-                       ext4_truncate(inode);
-
-               handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
-                       goto flags_out;
-               }
-               if (IS_SYNC(inode))
-                       ext4_handle_sync(handle);
-               err = ext4_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       goto flags_err;
-
-               for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-                       if (!(mask & EXT4_FL_USER_MODIFIABLE))
-                               continue;
-                       if (mask & flags)
-                               ext4_set_inode_flag(inode, i);
-                       else
-                               ext4_clear_inode_flag(inode, i);
-               }
-
-               ext4_set_inode_flags(inode);
-               inode->i_ctime = ext4_current_time(inode);
-
-               err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-               ext4_journal_stop(handle);
-               if (err)
-                       goto flags_out;
-
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-                       err = ext4_change_inode_journal_flag(inode, jflag);
-               if (err)
-                       goto flags_out;
-               if (migrate) {
-                       if (flags & EXT4_EXTENTS_FL)
-                               err = ext4_ext_migrate(inode);
-                       else
-                               err = ext4_ind_migrate(inode);
-               }
-
-flags_out:
-               mutex_unlock(&inode->i_mutex);
+               inode_lock(inode);
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
                mnt_drop_write_file(filp);
                return err;
        }
@@ -349,7 +497,7 @@ flags_out:
                        goto setversion_out;
                }
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
                if (IS_ERR(handle)) {
                        err = PTR_ERR(handle);
@@ -364,7 +512,7 @@ flags_out:
                ext4_journal_stop(handle);
 
 unlock_out:
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 setversion_out:
                mnt_drop_write_file(filp);
                return err;
@@ -510,9 +658,9 @@ group_add_out:
                 * ext4_ext_swap_inode_data before we switch the
                 * inode format to prevent read.
                 */
-               mutex_lock(&(inode->i_mutex));
+               inode_lock((inode));
                err = ext4_ext_migrate(inode);
-               mutex_unlock(&(inode->i_mutex));
+               inode_unlock((inode));
                mnt_drop_write_file(filp);
                return err;
        }
@@ -689,6 +837,60 @@ encryption_policy_out:
                return -EOPNOTSUPP;
 #endif
        }
+       case EXT4_IOC_FSGETXATTR:
+       {
+               struct fsxattr fa;
+
+               memset(&fa, 0, sizeof(struct fsxattr));
+               ext4_get_inode_flags(ei);
+               fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+               if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                               EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+                       fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+                               EXT4_I(inode)->i_projid);
+               }
+
+               if (copy_to_user((struct fsxattr __user *)arg,
+                                &fa, sizeof(fa)))
+                       return -EFAULT;
+               return 0;
+       }
+       case EXT4_IOC_FSSETXATTR:
+       {
+               struct fsxattr fa;
+               int err;
+
+               if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+                                  sizeof(fa)))
+                       return -EFAULT;
+
+               /* Make sure caller has proper permission */
+               if (!inode_owner_or_capable(inode))
+                       return -EACCES;
+
+               err = mnt_want_write_file(filp);
+               if (err)
+                       return err;
+
+               flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+               flags = ext4_mask_flags(inode->i_mode, flags);
+
+               inode_lock(inode);
+               flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+                        (flags & EXT4_FL_XFLAG_VISIBLE);
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
+               mnt_drop_write_file(filp);
+               if (err)
+                       return err;
+
+               err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+               if (err)
+                       return err;
+
+               return 0;
+       }
        default:
                return -ENOTTY;
        }
index f27e0c2598c59edb5685c27310c2766d0860838d..06574dd77614a3b1c4396d5bb3c017c681209d00 100644 (file)
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
                struct ext4_filename *fname,
                struct ext4_dir_entry_2 **res_dir);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode);
+                            struct inode *dir, struct inode *inode);
 
 /* checksumming functions */
 void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
  * directory, and adds the dentry to the indexed directory.
  */
 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
-                           struct dentry *dentry,
+                           struct inode *dir,
                            struct inode *inode, struct buffer_head *bh)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        struct buffer_head *bh2;
        struct dx_root  *root;
        struct dx_frame frames[2], *frame;
@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
 
        if (ext4_has_inline_data(dir)) {
-               retval = ext4_try_add_inline_entry(handle, &fname,
-                                                  dentry, inode);
+               retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
                if (retval < 0)
                        goto out;
                if (retval == 1) {
@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        }
 
        if (is_dx(dir)) {
-               retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+               retval = ext4_dx_add_entry(handle, &fname, dir, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
                        goto out;
                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 
                if (blocks == 1 && !dx_fallback &&
                    ext4_has_feature_dir_index(sb)) {
-                       retval = make_indexed_dir(handle, &fname, dentry,
+                       retval = make_indexed_dir(handle, &fname, dir,
                                                  inode, bh);
                        bh = NULL; /* make_indexed_dir releases bh */
                        goto out;
@@ -2154,12 +2152,11 @@ out:
  * Returns 0 for success, or a negative error value
  */
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode)
+                            struct inode *dir, struct inode *inode)
 {
        struct dx_frame frames[2], *frame;
        struct dx_entry *entries, *at;
        struct buffer_head *bh;
-       struct inode *dir = d_inode(dentry->d_parent);
        struct super_block *sb = dir->i_sb;
        struct ext4_dir_entry_2 *de;
        int err;
@@ -2756,7 +2753,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        /*
         * Exit early if inode already is on orphan list. This is a big speedup
         * since we don't have to contend on the global s_orphan_lock.
@@ -2838,7 +2835,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-                    !mutex_is_locked(&inode->i_mutex));
+                    !inode_is_locked(inode));
        /* Do this quick check before taking global s_orphan_lock. */
        if (list_empty(&ei->i_orphan))
                return 0;
@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry,
        if (ext4_encrypted_inode(dir) &&
            !ext4_is_child_context_consistent_with_parent(dir, inode))
                return -EPERM;
+
+       if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+          (!projid_eq(EXT4_I(dir)->i_projid,
+                      EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        err = dquot_initialize(dir);
        if (err)
                return err;
@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        int credits;
        u8 old_file_type;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+           (!projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                           new.inode)))
                return -EPERM;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)) ||
+           (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(old_dir)->i_projid,
+                       EXT4_I(new_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
index f1b56ff0120894e3e4cc79886928ea7852ac5e25..3ed01ec011d75067579a4b894b4b0623af265a2d 100644 (file)
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
 
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ *   page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ *   i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&ei->i_orphan);
        init_rwsem(&ei->xattr_sem);
        init_rwsem(&ei->i_data_sem);
+       init_rwsem(&ei->i_mmap_sem);
        inode_init_once(&ei->vfs_inode);
 }
 
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 }
 
 #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
 
 static int ext4_write_dquot(struct dquot *dquot);
 static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
        .write_info     = ext4_write_info,
        .alloc_dquot    = dquot_alloc,
        .destroy_dquot  = dquot_destroy,
+       .get_projid     = ext4_get_projid,
 };
 
 static const struct quotactl_ops ext4_qctl_operations = {
@@ -2254,10 +2286,10 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                                        __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                  inode->i_ino, inode->i_size);
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        truncate_inode_pages(inode->i_mapping, inode->i_size);
                        ext4_truncate(inode);
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        nr_truncates++;
                } else {
                        if (test_opt(sb, DEBUG))
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
                         "without CONFIG_QUOTA");
                return 0;
        }
+       if (ext4_has_feature_project(sb) && !readonly) {
+               ext4_msg(sb, KERN_ERR,
+                        "Filesystem with project quota feature cannot be mounted RDWR "
+                        "without CONFIG_QUOTA");
+               return 0;
+       }
 #endif  /* CONFIG_QUOTA */
        return 1;
 }
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_qcop = &dquot_quotactl_sysfile_ops;
        else
                sb->s_qcop = &ext4_qctl_operations;
-       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
@@ -4790,6 +4828,48 @@ restore_opts:
        return err;
 }
 
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+                              kprojid_t projid, struct kstatfs *buf)
+{
+       struct kqid qid;
+       struct dquot *dquot;
+       u64 limit;
+       u64 curblock;
+
+       qid = make_kqid_projid(projid);
+       dquot = dqget(sb, qid);
+       if (IS_ERR(dquot))
+               return PTR_ERR(dquot);
+       spin_lock(&dq_data_lock);
+
+       limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+                dquot->dq_dqb.dqb_bsoftlimit :
+                dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+       if (limit && buf->f_blocks > limit) {
+               curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+               buf->f_blocks = limit;
+               buf->f_bfree = buf->f_bavail =
+                       (buf->f_blocks > curblock) ?
+                        (buf->f_blocks - curblock) : 0;
+       }
+
+       limit = dquot->dq_dqb.dqb_isoftlimit ?
+               dquot->dq_dqb.dqb_isoftlimit :
+               dquot->dq_dqb.dqb_ihardlimit;
+       if (limit && buf->f_files > limit) {
+               buf->f_files = limit;
+               buf->f_ffree =
+                       (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+                        (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+       }
+
+       spin_unlock(&dq_data_lock);
+       dqput(dquot);
+       return 0;
+}
+#endif
+
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 
+#ifdef CONFIG_QUOTA
+       if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+           sb_has_quota_limits_enabled(sb, PRJQUOTA))
+               ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
        return 0;
 }
 
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
        struct inode *qf_inode;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
        int type, err = 0;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
index 011ba6670d990285f6f61b56ea3fb8b421b70e59..c70d06a383e28819cc556f1027ea118272e5f738 100644 (file)
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        truncate_inode_pages(inode->i_mapping, inode->i_size);
        ext4_truncate(inode);
+       up_write(&EXT4_I(inode)->i_mmap_sem);
 }
 
 /*
index ac9e7c6aac74df601ddf3043c7ce364ad0cf2294..5c06db17e41fa267f5b270061d2959b2a36803e4 100644 (file)
@@ -794,7 +794,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        return ret;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (start >= isize)
@@ -860,7 +860,7 @@ out:
        if (ret == 1)
                ret = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 18ddb1e5182a16e016224d449c6eb281c582b00e..ea272be62677004d29c8018691ea442c2c992bb1 100644 (file)
@@ -333,7 +333,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
        loff_t isize;
        int err = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
        if (offset >= isize)
@@ -388,10 +388,10 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 found:
        if (whence == SEEK_HOLE && data_ofs > isize)
                data_ofs = isize;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return vfs_setpos(file, data_ofs, maxbytes);
 fail:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return -ENXIO;
 }
 
@@ -1219,7 +1219,7 @@ static long f2fs_fallocate(struct file *file, int mode,
                        FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                if (offset >= inode->i_size)
@@ -1243,7 +1243,7 @@ static long f2fs_fallocate(struct file *file, int mode,
        }
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        trace_f2fs_fallocate(inode, mode, offset, len, ret);
        return ret;
@@ -1307,13 +1307,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
        flags = f2fs_mask_flags(inode->i_mode, flags);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        oldflags = fi->i_flags;
 
        if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                if (!capable(CAP_LINUX_IMMUTABLE)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        ret = -EPERM;
                        goto out;
                }
@@ -1322,7 +1322,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
        flags = flags & FS_FL_USER_MODIFIABLE;
        flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
        fi->i_flags = flags;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        f2fs_set_inode_flags(inode);
        inode->i_ctime = CURRENT_TIME;
@@ -1667,7 +1667,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 
        f2fs_balance_fs(sbi, true);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* writeback all dirty pages in the range */
        err = filemap_write_and_wait_range(inode->i_mapping, range->start,
@@ -1778,7 +1778,7 @@ do_map:
 clear_out:
        clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!err)
                range->len = (u64)total << PAGE_CACHE_SHIFT;
        return err;
index 7def96caec5f7fc4abd82e51746bce4dce1f30ee..d0b95c95079bbdc085a9746226427ceb3805bc45 100644 (file)
@@ -769,7 +769,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
 
        buf.dirent = dirent;
        buf.result = 0;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        buf.ctx.pos = file->f_pos;
        ret = -ENOENT;
        if (!IS_DEADDIR(inode)) {
@@ -777,7 +777,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
                                    short_only, both ? &buf : NULL);
                file->f_pos = buf.ctx.pos;
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret >= 0)
                ret = buf.result;
        return ret;
index 43d3475da83a79c8857e0861bf298df38a9d7ba7..f7018566883241bc65a981cf0d6e533e4d331241 100644 (file)
@@ -24,9 +24,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 {
        u32 attr;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        attr = fat_make_attrs(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return put_user(attr, user_attr);
 }
@@ -47,7 +47,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        err = mnt_want_write_file(file);
        if (err)
                goto out;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -109,7 +109,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        fat_save_attrs(inode, attr);
        mark_inode_dirty(inode);
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(file);
 out:
        return err;
@@ -246,7 +246,7 @@ static long fat_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (mode & FALLOC_FL_KEEP_SIZE) {
                ondisksize = inode->i_blocks << 9;
                if ((offset + len) <= ondisksize)
@@ -272,7 +272,7 @@ static long fat_fallocate(struct file *file, int mode,
        }
 
 error:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 5797d45a78cb29fddcff617a8fc03139ed609369..c5618db110be454781d6c7b8a70f8284fa00a477 100644 (file)
@@ -46,9 +46,9 @@ void put_filesystem(struct file_system_type *fs)
 static struct file_system_type **find_filesystem(const char *name, unsigned len)
 {
        struct file_system_type **p;
-       for (p=&file_systems; *p; p=&(*p)->next)
-               if (strlen((*p)->name) == len &&
-                   strncmp((*p)->name, name, len) == 0)
+       for (p = &file_systems; *p; p = &(*p)->next)
+               if (strncmp((*p)->name, name, len) == 0 &&
+                   !(*p)->name[len])
                        break;
        return p;
 }
index 712601f299b8a1436d69f0a31362ab57032f24b7..4b855b65d4577a2fd15389e22ddb40b2993f2e7a 100644 (file)
@@ -944,7 +944,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        if (!parent)
                return -ENOENT;
 
-       mutex_lock(&parent->i_mutex);
+       inode_lock(parent);
        if (!S_ISDIR(parent->i_mode))
                goto unlock;
 
@@ -962,7 +962,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        fuse_invalidate_entry(entry);
 
        if (child_nodeid != 0 && d_really_is_positive(entry)) {
-               mutex_lock(&d_inode(entry)->i_mutex);
+               inode_lock(d_inode(entry));
                if (get_node_id(d_inode(entry)) != child_nodeid) {
                        err = -ENOENT;
                        goto badentry;
@@ -983,7 +983,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
                clear_nlink(d_inode(entry));
                err = 0;
  badentry:
-               mutex_unlock(&d_inode(entry)->i_mutex);
+               inode_unlock(d_inode(entry));
                if (!err)
                        d_delete(entry);
        } else {
@@ -992,7 +992,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        dput(entry);
 
  unlock:
-       mutex_unlock(&parent->i_mutex);
+       inode_unlock(parent);
        iput(parent);
        return err;
 }
@@ -1504,7 +1504,7 @@ void fuse_set_nowrite(struct inode *inode)
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
 
-       BUG_ON(!mutex_is_locked(&inode->i_mutex));
+       BUG_ON(!inode_is_locked(inode));
 
        spin_lock(&fc->lock);
        BUG_ON(fi->writectr < 0);
index aa03aab6a24f085b210e39b9d093c84c76235c28..b03d253ece159c4340765e46993127a53354b21b 100644 (file)
@@ -207,7 +207,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
                return err;
 
        if (lock_inode)
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
        err = fuse_do_open(fc, get_node_id(inode), file, isdir);
 
@@ -215,7 +215,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
                fuse_finish_open(inode, file);
 
        if (lock_inode)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        return err;
 }
@@ -413,9 +413,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        fuse_sync_writes(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
@@ -450,7 +450,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        if (is_bad_inode(inode))
                return -EIO;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Start writeback against all dirty pages of the inode, then
@@ -486,7 +486,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
                err = 0;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
@@ -1160,7 +1160,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return generic_file_write_iter(iocb, from);
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
@@ -1210,7 +1210,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        }
 out:
        current->backing_dev_info = NULL;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return written ? written : err;
 }
@@ -1322,10 +1322,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                fuse_sync_writes(inode);
                if (!write)
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
        }
 
        while (count) {
@@ -1413,14 +1413,14 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return -EIO;
 
        /* Don't allow parallel writes to the same file */
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        res = generic_write_checks(iocb, from);
        if (res > 0)
                res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
        fuse_invalidate_attr(inode);
        if (res > 0)
                fuse_write_update_size(inode, iocb->ki_pos);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return res;
 }
@@ -2287,17 +2287,17 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
                retval = generic_file_llseek(file, offset, whence);
                break;
        case SEEK_END:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = fuse_update_attributes(inode, NULL, file, NULL);
                if (!retval)
                        retval = generic_file_llseek(file, offset, whence);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                break;
        case SEEK_HOLE:
        case SEEK_DATA:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                retval = fuse_lseek(file, offset, whence);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                break;
        default:
                retval = -EINVAL;
@@ -2944,7 +2944,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
                return -EOPNOTSUPP;
 
        if (lock_inode) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (mode & FALLOC_FL_PUNCH_HOLE) {
                        loff_t endbyte = offset + length - 1;
                        err = filemap_write_and_wait_range(inode->i_mapping,
@@ -2990,7 +2990,7 @@ out:
                clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
        if (lock_inode)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
        return err;
 }
index 7412863cda1e52736552ef0deea6aa84ff8c844f..c9384f932975efb8075d2fd2013116e8be9f055a 100644 (file)
@@ -914,7 +914,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
        if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
@@ -946,7 +946,7 @@ out_unlock:
        gfs2_glock_dq(&gh);
 out_uninit:
        gfs2_holder_uninit(&gh);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index 3e94400d587c4e1efdea6afee130b7b35c0a16f5..352f958769e15b150575c54b09d82b51d34f258e 100644 (file)
@@ -2067,7 +2067,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
        if (ret)
@@ -2094,7 +2094,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        gfs2_glock_dq_uninit(&gh);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index be6d9c450b22d212cfcdfa8d42e07f76705347e2..a398913442591892f6b7e24e14d2b1a8e30b7394 100644 (file)
@@ -888,7 +888,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                return -ENOMEM;
 
        sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
-       mutex_lock(&ip->i_inode.i_mutex);
+       inode_lock(&ip->i_inode);
        for (qx = 0; qx < num_qd; qx++) {
                error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
                                           GL_NOCACHE, &ghs[qx]);
@@ -953,7 +953,7 @@ out_alloc:
 out:
        while (qx--)
                gfs2_glock_dq_uninit(&ghs[qx]);
-       mutex_unlock(&ip->i_inode.i_mutex);
+       inode_unlock(&ip->i_inode);
        kfree(ghs);
        gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
        return error;
@@ -1674,7 +1674,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
        if (error)
                goto out_put;
 
-       mutex_lock(&ip->i_inode.i_mutex);
+       inode_lock(&ip->i_inode);
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
        if (error)
                goto out_unlockput;
@@ -1739,7 +1739,7 @@ out_i:
 out_q:
        gfs2_glock_dq_uninit(&q_gh);
 out_unlockput:
-       mutex_unlock(&ip->i_inode.i_mutex);
+       inode_unlock(&ip->i_inode);
 out_put:
        qd_put(qd);
        return error;
index 70788e03820ae3e1c34689c95493798c20781928..e9f2b855f83160d3798e4ef62f6d86a7f0e69547 100644 (file)
@@ -173,9 +173,9 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
 {
        struct hfs_readdir_data *rd = file->private_data;
        if (rd) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                list_del(&rd->list);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                kfree(rd);
        }
        return 0;
index b99ebddb10cb5b5e2d422c0f2151fc926455d26b..6686bf39a5b5a0b4bb83de60527cd873b612fc35 100644 (file)
@@ -570,13 +570,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
        if (HFS_IS_RSRC(inode))
                inode = HFS_I(inode)->rsrc_inode;
        if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                hfs_file_truncate(inode);
                //if (inode->i_flags & S_DEAD) {
                //      hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
                //      hfs_delete_inode(inode);
                //}
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
@@ -656,7 +656,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
                return ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* sync the inode to buffers */
        ret = write_inode_now(inode, 0);
@@ -668,7 +668,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        err = sync_blockdev(sb->s_bdev);
        if (!ret)
                ret = err;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
index d0f39dcbb58e86057c91c2e32a9d538178da39c5..a4e867e089478ddd3edaf24ce9be7d485d66ad34 100644 (file)
@@ -284,9 +284,9 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file)
 {
        struct hfsplus_readdir_data *rd = file->private_data;
        if (rd) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                list_del(&rd->list);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                kfree(rd);
        }
        return 0;
index 19b33f8151f1ab2cb0957b7e5772ff1206659aad..1a6394cdb54ef59520dbcd7f7577aee5d93d2561 100644 (file)
@@ -229,14 +229,14 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        if (HFSPLUS_IS_RSRC(inode))
                inode = HFSPLUS_I(inode)->rsrc_inode;
        if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                hfsplus_file_truncate(inode);
                if (inode->i_flags & S_DEAD) {
                        hfsplus_delete_cat(inode->i_ino,
                                           HFSPLUS_SB(sb)->hidden_dir, NULL);
                        hfsplus_delete_inode(inode);
                }
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
@@ -286,7 +286,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
        error = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (error)
                return error;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * Sync inode metadata into the catalog and extent trees.
@@ -327,7 +327,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
        if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return error;
 }
index 0624ce4e07022e3f047683a70cbbf1cdad6ff174..32a49e292b6a6b5d94421eaa8eb59541d15b12b5 100644 (file)
@@ -93,7 +93,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
                goto out_drop_write;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
            inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
@@ -126,7 +126,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
        mark_inode_dirty(inode);
 
 out_unlock_inode:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out_drop_write:
        mnt_drop_write_file(file);
 out:
index cfaa18c7a33796e0ee330d7b67e4b98bb6a95072..d1abbee281d19d8f51f1417bdb56f2adebc178f0 100644 (file)
@@ -378,9 +378,9 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index dc540bfcee1d8e7842b6b0f0ca402214f9bf5609..e57a53c13d864a74431ee4a98653df083009a317 100644 (file)
@@ -33,7 +33,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        if (whence == SEEK_DATA || whence == SEEK_HOLE)
                return -EINVAL;
 
-       mutex_lock(&i->i_mutex);
+       inode_lock(i);
        hpfs_lock(s);
 
        /*pr_info("dir lseek\n");*/
@@ -48,12 +48,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
 ok:
        filp->f_pos = new_off;
        hpfs_unlock(s);
-       mutex_unlock(&i->i_mutex);
+       inode_unlock(i);
        return new_off;
 fail:
        /*pr_warn("illegal lseek: %016llx\n", new_off);*/
        hpfs_unlock(s);
-       mutex_unlock(&i->i_mutex);
+       inode_unlock(i);
        return -ESPIPE;
 }
 
index 8bbf7f3e2a27e0669e7f7fd5624a2cb5b50ce0fe..e1f465a389d5be1b27f8fd98451312bb94b454b8 100644 (file)
@@ -141,7 +141,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
        vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        file_accessed(file);
 
        ret = -ENOMEM;
@@ -157,7 +157,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        if (vma->vm_flags & VM_WRITE && inode->i_size < len)
                inode->i_size = len;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
@@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        if (hole_end > hole_start) {
                struct address_space *mapping = inode->i_mapping;
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_mmap_lock_write(mapping);
                if (!RB_EMPTY_ROOT(&mapping->i_mmap))
                        hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -538,7 +538,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                                                hole_end  >> PAGE_SHIFT);
                i_mmap_unlock_write(mapping);
                remove_inode_hugepages(inode, hole_start, hole_end);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        return 0;
@@ -572,7 +572,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
        start = offset >> hpage_shift;
        end = (offset + len + hpage_size - 1) >> hpage_shift;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
        error = inode_newsize_ok(inode, offset + len);
@@ -659,7 +659,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                i_size_write(inode, offset + len);
        inode->i_ctime = CURRENT_TIME;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
index e491e54d243025999e42fdec674b082a765188fd..9f62db3bcc3efcc9cc678ed0d9bc88152f692ff8 100644 (file)
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
         */
        spin_lock_irq(&inode->i_data.tree_lock);
        BUG_ON(inode->i_data.nrpages);
-       BUG_ON(inode->i_data.nrshadows);
+       BUG_ON(inode->i_data.nrexceptional);
        spin_unlock_irq(&inode->i_data.tree_lock);
        BUG_ON(!list_empty(&inode->i_data.private_list));
        BUG_ON(!(inode->i_state & I_FREEING));
@@ -966,9 +966,9 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
                swap(inode1, inode2);
 
        if (inode1 && !S_ISDIR(inode1->i_mode))
-               mutex_lock(&inode1->i_mutex);
+               inode_lock(inode1);
        if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
-               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+               inode_lock_nested(inode2, I_MUTEX_NONDIR2);
 }
 EXPORT_SYMBOL(lock_two_nondirectories);
 
@@ -980,9 +980,9 @@ EXPORT_SYMBOL(lock_two_nondirectories);
 void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 {
        if (inode1 && !S_ISDIR(inode1->i_mode))
-               mutex_unlock(&inode1->i_mutex);
+               inode_unlock(inode1);
        if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
-               mutex_unlock(&inode2->i_mutex);
+               inode_unlock(inode2);
 }
 EXPORT_SYMBOL(unlock_two_nondirectories);
 
index 29466c380958ac411f4dcd46706837f2c55b03cd..116a333e9c773c10b76a8030c268284869c2533e 100644 (file)
@@ -434,9 +434,9 @@ int generic_block_fiemap(struct inode *inode,
                         u64 len, get_block_t *get_block)
 {
        int ret;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 EXPORT_SYMBOL(generic_block_fiemap);
index a3750f902adcbae8f8cabbad066f9d629b805155..0ae91ad6df2dc916f077495876ec7fe1e47902ed 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
 #include "nodelist.h"
 
 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
        return 0;
 
  out_free:
-#ifndef __ECOS
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-#endif
-               kfree(c->blocks);
+       kvfree(c->blocks);
 
        return ret;
 }
index f509f62e12f6ef85e95b5c159353cba757ef4af1..c5ac5944bc1bd8ef1362e5911656f3bfb68ea70b 100644 (file)
@@ -39,10 +39,10 @@ int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Trigger GC to flush any pending writes for this inode */
        jffs2_flush_wbuf_gc(c, inode->i_ino);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index 2caf1682036dc1914aaa887565b0a54edbe0ed35..bead25ae8fe4a563d257995102da2cbe7336eeb3 100644 (file)
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 out_root:
        jffs2_free_ino_caches(c);
        jffs2_free_raw_node_refs(c);
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-               kfree(c->blocks);
+       kvfree(c->blocks);
  out_inohash:
        jffs2_clear_xattr_subsystem(c);
        kfree(c->inocache_list);
index bb080c272149312ea382ee578ffdaa903620a43a..0a9a114bb9d1138b3060365986304cdb2df1428f 100644 (file)
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
 
        jffs2_free_ino_caches(c);
        jffs2_free_raw_node_refs(c);
-       if (jffs2_blocks_use_vmalloc(c))
-               vfree(c->blocks);
-       else
-               kfree(c->blocks);
+       kvfree(c->blocks);
        jffs2_flash_cleanup(c);
        kfree(c->inocache_list);
        jffs2_clear_xattr_subsystem(c);
index 0e026a7bdcd4d9512b8074016b9f56a31de3b8f9..4ce7735dd042267bd086fd420066592e3b6855d3 100644 (file)
@@ -38,17 +38,17 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (rc)
                return rc;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (!(inode->i_state & I_DIRTY_ALL) ||
            (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
                /* Make sure committed changes hit the disk */
                jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return rc;
        }
 
        rc |= jfs_commit_inode(inode, 1);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return rc ? -EIO : 0;
 }
index 8db8b7d61e4048cf97ac289f263bc78c8685294d..8653cac7e12eed46e0bc360741004dfd3f1ce54d 100644 (file)
@@ -96,7 +96,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                }
 
                /* Lock against other parallel changes of flags */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
 
                jfs_get_inode_flags(jfs_inode);
                oldflags = jfs_inode->mode2;
@@ -109,7 +109,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        ((flags ^ oldflags) &
                        (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                err = -EPERM;
                                goto setflags_out;
                        }
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                jfs_inode->mode2 = flags;
 
                jfs_set_inode_flags(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
 setflags_out:
index 900925b5eb8c225b151b58cf8133ee6a6e11e1b1..4f5d85ba8e237e91d3f314b330e53e26ad2cc22b 100644 (file)
@@ -792,7 +792,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
        struct buffer_head tmp_bh;
        struct buffer_head *bh;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        while (towrite > 0) {
                tocopy = sb->s_blocksize - offset < towrite ?
                                sb->s_blocksize - offset : towrite;
@@ -824,7 +824,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
        }
 out:
        if (len == towrite) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return err;
        }
        if (inode->i_size < off+len-towrite)
@@ -832,7 +832,7 @@ out:
        inode->i_version++;
        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        mark_inode_dirty(inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return len - towrite;
 }
 
index 821973853340a106874da673618bf57bfbcc1178..996b7742c90b2eaf1e8b154ae3cc580293d6f94b 100644 (file)
@@ -1511,9 +1511,9 @@ static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
        struct inode *inode = file_inode(file);
        loff_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_file_llseek(file, offset, whence);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return ret;
 }
index 01491299f348c965adc27cbcd70340ab2d980946..0ca80b2af42015c309718b3328315471808cfa4c 100644 (file)
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(dcache_dir_close);
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry *dentry = file->f_path.dentry;
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -97,7 +97,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       inode_unlock(d_inode(dentry));
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -124,7 +124,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&dentry->d_lock);
                }
        }
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return offset;
 }
 EXPORT_SYMBOL(dcache_dir_lseek);
@@ -941,7 +941,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = sync_mapping_buffers(inode->i_mapping);
        if (!(inode->i_state & I_DIRTY_ALL))
                goto out;
@@ -953,7 +953,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
                ret = err;
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 EXPORT_SYMBOL(__generic_file_fsync);
index af1ed74a657fbc93f64386bcad54d9826bc34684..7c5f91be9b65c4ddabe441d00bbda616ca0c2f26 100644 (file)
@@ -1650,12 +1650,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
         * bother, maybe that's a sign this just isn't a good file to
         * hand out a delegation on.
         */
-       if (is_deleg && !mutex_trylock(&inode->i_mutex))
+       if (is_deleg && !inode_trylock(inode))
                return -EAGAIN;
 
        if (is_deleg && arg == F_WRLCK) {
                /* Write delegations are not currently supported: */
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                WARN_ON_ONCE(1);
                return -EINVAL;
        }
@@ -1732,7 +1732,7 @@ out:
        spin_unlock(&ctx->flc_lock);
        locks_dispose_list(&dispose);
        if (is_deleg)
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        if (!error && !my_fl)
                *flp = NULL;
        return error;
index 1a6f0167b16a7a4b657138ed7c366e8fe3151dfd..61eaeb1b6cac10e664539b2132607180d8fe24d0 100644 (file)
@@ -204,12 +204,12 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if (err)
                        return err;
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                oldflags = li->li_flags;
                flags &= LOGFS_FL_USER_MODIFIABLE;
                flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
                li->li_flags = flags;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                inode->i_ctime = CURRENT_TIME;
                mark_inode_dirty_sync(inode);
@@ -230,11 +230,11 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        logfs_get_wblocks(sb, NULL, WF_LOCK);
        logfs_write_anchor(sb);
        logfs_put_wblocks(sb, NULL, WF_LOCK);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return 0;
 }
index bceefd5588a2bf908626a0a6282a5f810ecfc5c1..f624d132e01e6f5488fd0d9af13007b015e1e178 100644 (file)
@@ -1629,9 +1629,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
        parent = nd->path.dentry;
        BUG_ON(nd->inode != parent->d_inode);
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        dentry = __lookup_hash(&nd->last, parent, nd->flags);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
        path->mnt = nd->path.mnt;
@@ -2229,10 +2229,10 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
                putname(filename);
                return ERR_PTR(-EINVAL);
        }
-       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        d = __lookup_hash(&last, path->dentry, 0);
        if (IS_ERR(d)) {
-               mutex_unlock(&path->dentry->d_inode->i_mutex);
+               inode_unlock(path->dentry->d_inode);
                path_put(path);
        }
        putname(filename);
@@ -2282,7 +2282,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        unsigned int c;
        int err;
 
-       WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(base->d_inode));
 
        this.name = name;
        this.len = len;
@@ -2380,9 +2380,9 @@ struct dentry *lookup_one_len_unlocked(const char *name,
        if (ret)
                return ret;
 
-       mutex_lock(&base->d_inode->i_mutex);
+       inode_lock(base->d_inode);
        ret =  __lookup_hash(&this, base, 0);
-       mutex_unlock(&base->d_inode->i_mutex);
+       inode_unlock(base->d_inode);
        return ret;
 }
 EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2463,7 +2463,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                goto done;
        }
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        dentry = d_lookup(dir, &nd->last);
        if (!dentry) {
                /*
@@ -2473,16 +2473,16 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                 */
                dentry = d_alloc(dir, &nd->last);
                if (!dentry) {
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       inode_unlock(dir->d_inode);
                        return -ENOMEM;
                }
                dentry = lookup_real(dir->d_inode, dentry, nd->flags);
                if (IS_ERR(dentry)) {
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       inode_unlock(dir->d_inode);
                        return PTR_ERR(dentry);
                }
        }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
 done:
        if (d_is_negative(dentry)) {
@@ -2672,7 +2672,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        struct dentry *p;
 
        if (p1 == p2) {
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
                return NULL;
        }
 
@@ -2680,29 +2680,29 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 
        p = d_ancestor(p2, p1);
        if (p) {
-               mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
                return p;
        }
 
        p = d_ancestor(p1, p2);
        if (p) {
-               mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
                return p;
        }
 
-       mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-       mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
+       inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+       inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
        return NULL;
 }
 EXPORT_SYMBOL(lock_rename);
 
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
-       mutex_unlock(&p1->d_inode->i_mutex);
+       inode_unlock(p1->d_inode);
        if (p1 != p2) {
-               mutex_unlock(&p2->d_inode->i_mutex);
+               inode_unlock(p2->d_inode);
                mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
        }
 }
@@ -3141,9 +3141,9 @@ retry_lookup:
                 * dropping this one anyway.
                 */
        }
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        error = lookup_open(nd, &path, file, op, got_write, opened);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        if (error <= 0) {
                if (error)
@@ -3489,7 +3489,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
         * Do the final lookup.
         */
        lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
-       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path->dentry, lookup_flags);
        if (IS_ERR(dentry))
                goto unlock;
@@ -3518,7 +3518,7 @@ fail:
        dput(dentry);
        dentry = ERR_PTR(error);
 unlock:
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        if (!err2)
                mnt_drop_write(path->mnt);
 out:
@@ -3538,7 +3538,7 @@ EXPORT_SYMBOL(kern_path_create);
 void done_path_create(struct path *path, struct dentry *dentry)
 {
        dput(dentry);
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        mnt_drop_write(path->mnt);
        path_put(path);
 }
@@ -3735,7 +3735,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
                return -EPERM;
 
        dget(dentry);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
 
        error = -EBUSY;
        if (is_local_mountpoint(dentry))
@@ -3755,7 +3755,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        detach_mounts(dentry);
 
 out:
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
        dput(dentry);
        if (!error)
                d_delete(dentry);
@@ -3794,7 +3794,7 @@ retry:
        if (error)
                goto exit1;
 
-       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
@@ -3810,7 +3810,7 @@ retry:
 exit3:
        dput(dentry);
 exit2:
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       inode_unlock(path.dentry->d_inode);
        mnt_drop_write(path.mnt);
 exit1:
        path_put(&path);
@@ -3856,7 +3856,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
        if (!dir->i_op->unlink)
                return -EPERM;
 
-       mutex_lock(&target->i_mutex);
+       inode_lock(target);
        if (is_local_mountpoint(dentry))
                error = -EBUSY;
        else {
@@ -3873,7 +3873,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
                }
        }
 out:
-       mutex_unlock(&target->i_mutex);
+       inode_unlock(target);
 
        /* We don't d_delete() NFS sillyrenamed files--they still exist. */
        if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -3916,7 +3916,7 @@ retry:
        if (error)
                goto exit1;
 retry_deleg:
-       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
@@ -3934,7 +3934,7 @@ retry_deleg:
 exit2:
                dput(dentry);
        }
-       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       inode_unlock(path.dentry->d_inode);
        if (inode)
                iput(inode);    /* truncate the inode here */
        inode = NULL;
@@ -4086,7 +4086,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* Make sure we don't allow creating hardlink to an unlinked file */
        if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
                error =  -ENOENT;
@@ -4103,7 +4103,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                inode->i_state &= ~I_LINKABLE;
                spin_unlock(&inode->i_lock);
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!error)
                fsnotify_link(dir, inode, new_dentry);
        return error;
@@ -4303,7 +4303,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (!is_dir || (flags & RENAME_EXCHANGE))
                lock_two_nondirectories(source, target);
        else if (target)
-               mutex_lock(&target->i_mutex);
+               inode_lock(target);
 
        error = -EBUSY;
        if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4356,7 +4356,7 @@ out:
        if (!is_dir || (flags & RENAME_EXCHANGE))
                unlock_two_nondirectories(source, target);
        else if (target)
-               mutex_unlock(&target->i_mutex);
+               inode_unlock(target);
        dput(new_dentry);
        if (!error) {
                fsnotify_move(old_dir, new_dir, old_name, is_dir,
index a830e1463704b590b72947d313cd50b454555668..4fb1691b435552c044134b56aa2032da35684b5b 100644 (file)
@@ -1961,9 +1961,9 @@ static struct mountpoint *lock_mount(struct path *path)
        struct vfsmount *mnt;
        struct dentry *dentry = path->dentry;
 retry:
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        if (unlikely(cant_mount(dentry))) {
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               inode_unlock(dentry->d_inode);
                return ERR_PTR(-ENOENT);
        }
        namespace_lock();
@@ -1974,13 +1974,13 @@ retry:
                        mp = new_mountpoint(dentry);
                if (IS_ERR(mp)) {
                        namespace_unlock();
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       inode_unlock(dentry->d_inode);
                        return mp;
                }
                return mp;
        }
        namespace_unlock();
-       mutex_unlock(&path->dentry->d_inode->i_mutex);
+       inode_unlock(path->dentry->d_inode);
        path_put(path);
        path->mnt = mnt;
        dentry = path->dentry = dget(mnt->mnt_root);
@@ -1992,7 +1992,7 @@ static void unlock_mount(struct mountpoint *where)
        struct dentry *dentry = where->m_dentry;
        put_mountpoint(where);
        namespace_unlock();
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
 }
 
 static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
index f0e3e9e747dd72aaa756611cb0c881d19b902d5a..26c2de2de13fd0ce7bb55287c58e268cf4bf37e6 100644 (file)
@@ -369,7 +369,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
        if (!res) {
                struct inode *inode = d_inode(dentry);
 
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
                        ncp_new_dentry(dentry);
                        val=1;
@@ -377,7 +377,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        ncp_dbg(2, "found, but dirEntNum changed\n");
 
                ncp_update_inode2(inode, &finfo);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
 finished:
@@ -639,9 +639,9 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
        } else {
                struct inode *inode = d_inode(newdent);
 
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(inode, I_MUTEX_CHILD);
                ncp_update_inode2(inode, entry);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        if (ctl.idx >= NCP_DIRCACHE_SIZE) {
index 011324ce9df21181d6983f1fe81c659e1120a814..dd38ca1f2ecb9a181668496cc056653e1e470be2 100644 (file)
@@ -224,10 +224,10 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        iocb->ki_pos = pos;
 
        if (pos > i_size_read(inode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (pos > i_size_read(inode))
                        i_size_write(inode, pos);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        ncp_dbg(1, "exit %pD2\n", file);
 outrel:
index c82a21228a342a8f4e6c42ea0d0366b225cb0c74..9cce67043f92a589193625ca3005d0b1e0f4e36b 100644 (file)
@@ -940,7 +940,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
        dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
                        filp, offset, whence);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
                case 1:
                        offset += filp->f_pos;
@@ -957,7 +957,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
                dir_ctx->duped = 0;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -972,9 +972,9 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 
        dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return 0;
 }
 
index 7ab7ec9f4eed8a0212d2079d7c2c79cb331d7a20..7a0cfd3266e561620577bef3cf449171f10d9f92 100644 (file)
@@ -580,7 +580,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
        if (!count)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        result = nfs_sync_mapping(mapping);
        if (result)
                goto out_unlock;
@@ -608,7 +608,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
        NFS_I(inode)->read_io += count;
        result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (!result) {
                result = nfs_direct_wait(dreq);
@@ -622,7 +622,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 out_release:
        nfs_direct_req_release(dreq);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return result;
 }
@@ -1005,7 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
        pos = iocb->ki_pos;
        end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        result = nfs_sync_mapping(mapping);
        if (result)
@@ -1045,7 +1045,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
                                              pos >> PAGE_CACHE_SHIFT, end);
        }
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (!result) {
                result = nfs_direct_wait(dreq);
@@ -1066,7 +1066,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
        nfs_direct_req_release(dreq);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return result;
 }
 
index 4ef8f5addcadf8a7d8c67dbb4bd2a9583d26a487..748bb813b8ecd63095f2f50b8e733fe960e188ec 100644 (file)
@@ -278,9 +278,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
                        break;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
                 * resend all dirty pages that might have been covered by
index bb1f4e7a3270e0ff641ebea34c51f3080e01354c..3384dc8e66836c1fe4bc4e0a507a7e9250c5e719 100644 (file)
@@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
        u32 i, j;
 
        if (fl->commit_through_mds) {
-               nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+               nfs_request_add_commit_list(req, cinfo);
        } else {
                /* Note that we are calling nfs4_fl_calc_j_index on each page
                 * that ends up being committed to a data server.  An attractive
index 6594e9f903a0315dde75ac3fbccecf64521d22e9..0cb1abd535e38469201478ee597e74045e593bc5 100644 (file)
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
                                        hdr->pgio_mirror_idx + 1,
                                        &hdr->pgio_mirror_idx))
                        goto out_eagain;
-               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                        &hdr->lseg->pls_layout->plh_flags);
                pnfs_read_resend_pnfs(hdr);
                return task->tk_status;
@@ -1948,11 +1948,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
        start = xdr_reserve_space(xdr, 4);
        BUG_ON(!start);
 
-       if (ff_layout_encode_ioerr(flo, xdr, args))
-               goto out;
-
+       ff_layout_encode_ioerr(flo, xdr, args);
        ff_layout_encode_iostats(flo, xdr, args);
-out:
+
        *start = cpu_to_be32((xdr->p - start - 1) * 4);
        dprintk("%s: Return\n", __func__);
 }
index bd0327541366c2066b7720933373d0c6758ad4e0..eb370460ce203c11c8461e88115cc0fab9c417fa 100644 (file)
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
        err->length = end - err->offset;
 }
 
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err,  u64 offset,
-                              u64 length, int status, enum nfs_opnum4 opnum,
-                              nfs4_stateid *stateid,
-                              struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+               const struct nfs4_ff_layout_ds_err *e2)
 {
-       return err->status == status && err->opnum == opnum &&
-              nfs4_stateid_match(&err->stateid, stateid) &&
-              !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
-              end_offset(err->offset, err->length) >= offset &&
-              err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
-                          struct nfs4_ff_layout_ds_err *new)
-{
-       if (!ds_error_can_merge(old, new->offset, new->length, new->status,
-                               new->opnum, &new->stateid, &new->deviceid))
-               return false;
-
-       extend_ds_error(old, new->offset, new->length);
-       return true;
+       int ret;
+
+       if (e1->opnum != e2->opnum)
+               return e1->opnum < e2->opnum ? -1 : 1;
+       if (e1->status != e2->status)
+               return e1->status < e2->status ? -1 : 1;
+       ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+       if (ret != 0)
+               return ret;
+       ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+       if (ret != 0)
+               return ret;
+       if (end_offset(e1->offset, e1->length) < e2->offset)
+               return -1;
+       if (e1->offset > end_offset(e2->offset, e2->length))
+               return 1;
+       /* If ranges overlap or are contiguous, they are the same */
+       return 0;
 }
 
-static bool
+static void
 ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
                              struct nfs4_ff_layout_ds_err *dserr)
 {
-       struct nfs4_ff_layout_ds_err *err;
-
-       list_for_each_entry(err, &flo->error_list, list) {
-               if (merge_ds_error(err, dserr)) {
-                       return true;
-               }
-       }
-
-       list_add(&dserr->list, &flo->error_list);
-       return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
-                         u64 length, int status, enum nfs_opnum4 opnum,
-                         nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
-       bool found = false;
-       struct nfs4_ff_layout_ds_err *err;
-
-       list_for_each_entry(err, &flo->error_list, list) {
-               if (ds_error_can_merge(err, offset, length, status, opnum,
-                                      stateid, deviceid)) {
-                       found = true;
-                       extend_ds_error(err, offset, length);
+       struct nfs4_ff_layout_ds_err *err, *tmp;
+       struct list_head *head = &flo->error_list;
+       int match;
+
+       /* Do insertion sort w/ merges */
+       list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+               match = ff_ds_error_match(err, dserr);
+               if (match < 0)
+                       continue;
+               if (match > 0) {
+                       /* Add entry "dserr" _before_ entry "err" */
+                       head = &err->list;
                        break;
                }
+               /* Entries match, so merge "err" into "dserr" */
+               extend_ds_error(dserr, err->offset, err->length);
+               list_del(&err->list);
+               kfree(err);
        }
 
-       return found;
+       list_add_tail(&dserr->list, head);
 }
 
 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
                             gfp_t gfp_flags)
 {
        struct nfs4_ff_layout_ds_err *dserr;
-       bool needfree;
 
        if (status == 0)
                return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
        if (mirror->mirror_ds == NULL)
                return -EINVAL;
 
-       spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-       if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
-                                     &mirror->stateid,
-                                     &mirror->mirror_ds->id_node.deviceid)) {
-               spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-               return 0;
-       }
-       spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
        dserr = kmalloc(sizeof(*dserr), gfp_flags);
        if (!dserr)
                return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
               NFS4_DEVICEID4_SIZE);
 
        spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-       needfree = ff_layout_add_ds_error_locked(flo, dserr);
+       ff_layout_add_ds_error_locked(flo, dserr);
        spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-       if (needfree)
-               kfree(dserr);
 
        return 0;
 }
@@ -431,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                                         OP_ILLEGAL, GFP_NOIO);
                if (!fail_return) {
                        if (ff_layout_has_available_ds(lseg))
-                               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                        &lseg->pls_layout->plh_flags);
                        else
                                pnfs_error_mark_layout_for_return(ino, lseg);
index 8e24d886d2c5991c044965c3fa3093f3142632b8..86faecf8f328f2639bab1c5f7391df6d659610f2 100644 (file)
@@ -661,9 +661,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        trace_nfs_getattr_enter(inode);
        /* Flush out writes to the server in order to update c/mtime.  */
        if (S_ISREG(inode->i_mode)) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                err = nfs_sync_inode(inode);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                if (err)
                        goto out;
        }
@@ -1178,9 +1178,9 @@ static int __nfs_revalidate_mapping(struct inode *inode,
        spin_unlock(&inode->i_lock);
        trace_nfs_invalidate_mapping_enter(inode);
        if (may_lock) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_invalidate_mapping(inode, mapping);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        } else
                ret = nfs_invalidate_mapping(inode, mapping);
        trace_nfs_invalidate_mapping_exit(inode, ret);
index 4e8cc942336c83dcbf9b5b615eb21dfa14be758d..9a547aa3ec8e873160da5a36906eb3813650034d 100644 (file)
@@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
                      struct nfs_commit_info *cinfo,
                      u32 ds_commit_idx);
 void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
                                 struct nfs_commit_info *cinfo);
 void nfs_request_add_commit_list_locked(struct nfs_page *req,
                struct list_head *dst,
index 6e8174930a48eb705979c93fca0d674fba72c0e7..bd25dc7077f72f5a79dfbf81e27f15a75a85dd92 100644 (file)
@@ -101,13 +101,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
        if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
@@ -123,7 +123,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
                return -EOPNOTSUPP;
 
        nfs_wb_all(inode);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == 0)
@@ -131,7 +131,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index 26f9a23e2b254998d6b0c053e2e0892d2cd16050..57ca1c8039c1e00bfcb6e9bb4d8fe7e40f4578d5 100644 (file)
@@ -141,11 +141,11 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
                        break;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
                if (!ret)
                        ret = pnfs_sync_inode(inode, !!datasync);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
                 * resend all dirty pages that might have been covered by
@@ -219,13 +219,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 
        /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
        if (same_inode) {
-               mutex_lock(&src_inode->i_mutex);
+               inode_lock(src_inode);
        } else if (dst_inode < src_inode) {
-               mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(dst_inode, I_MUTEX_PARENT);
+               inode_lock_nested(src_inode, I_MUTEX_CHILD);
        } else {
-               mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
+               inode_lock_nested(src_inode, I_MUTEX_PARENT);
+               inode_lock_nested(dst_inode, I_MUTEX_CHILD);
        }
 
        /* flush all pending writes on both src and dst so that server
@@ -246,13 +246,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 
 out_unlock:
        if (same_inode) {
-               mutex_unlock(&src_inode->i_mutex);
+               inode_unlock(src_inode);
        } else if (dst_inode < src_inode) {
-               mutex_unlock(&src_inode->i_mutex);
-               mutex_unlock(&dst_inode->i_mutex);
+               inode_unlock(src_inode);
+               inode_unlock(dst_inode);
        } else {
-               mutex_unlock(&dst_inode->i_mutex);
-               mutex_unlock(&src_inode->i_mutex);
+               inode_unlock(dst_inode);
+               inode_unlock(src_inode);
        }
 out:
        return ret;
index a3592cc34a20b76341c9c9b9af3378bdddebd477..482b6e94bb37cd1f3abaca235e94d961d9c15cb4 100644 (file)
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
  */
 static LIST_HEAD(pnfs_modules_tbl);
 
-static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
-                      enum pnfs_iomode iomode, bool sync);
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
 
 /* Return the registered pnfs layout driver module matching given id */
 static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
        struct inode *inode = lo->plh_inode;
 
+       pnfs_layoutreturn_before_put_layout_hdr(lo);
+
        if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
                if (!list_empty(&lo->plh_segs))
                        WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -345,58 +345,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
        rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
-/* Return true if layoutreturn is needed */
-static bool
-pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
-                       struct pnfs_layout_segment *lseg)
-{
-       struct pnfs_layout_segment *s;
-
-       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               return false;
-
-       list_for_each_entry(s, &lo->plh_segs, pls_list)
-               if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
-                       return false;
-
-       return true;
-}
-
-static bool
-pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
-{
-       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
-               return false;
-       lo->plh_return_iomode = 0;
-       pnfs_get_layout_hdr(lo);
-       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
-       return true;
-}
-
-static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
-               struct pnfs_layout_hdr *lo, struct inode *inode)
-{
-       lo = lseg->pls_layout;
-       inode = lo->plh_inode;
-
-       spin_lock(&inode->i_lock);
-       if (pnfs_layout_need_return(lo, lseg)) {
-               nfs4_stateid stateid;
-               enum pnfs_iomode iomode;
-               bool send;
-
-               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
-               iomode = lo->plh_return_iomode;
-               send = pnfs_prepare_layoutreturn(lo);
-               spin_unlock(&inode->i_lock);
-               if (send) {
-                       /* Send an async layoutreturn so we dont deadlock */
-                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
-               }
-       } else
-               spin_unlock(&inode->i_lock);
-}
-
 void
 pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 {
@@ -410,15 +358,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
                atomic_read(&lseg->pls_refcount),
                test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 
-       /* Handle the case where refcount != 1 */
-       if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
-               return;
-
        lo = lseg->pls_layout;
        inode = lo->plh_inode;
-       /* Do we need a layoutreturn? */
-       if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
 
        if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
                if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -937,6 +878,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+       return true;
+}
+
 static int
 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
                       enum pnfs_iomode iomode, bool sync)
@@ -971,6 +923,48 @@ out:
        return status;
 }
 
+/* Return true if layoutreturn is needed */
+static bool
+pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
+{
+       struct pnfs_layout_segment *s;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return false;
+
+       /* Defer layoutreturn until all lsegs are done */
+       list_for_each_entry(s, &lo->plh_segs, pls_list) {
+               if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
+                       return false;
+       }
+
+       return true;
+}
+
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+       struct inode *inode= lo->plh_inode;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return;
+       spin_lock(&inode->i_lock);
+       if (pnfs_layout_need_return(lo)) {
+               nfs4_stateid stateid;
+               enum pnfs_iomode iomode;
+               bool send;
+
+               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+               iomode = lo->plh_return_iomode;
+               send = pnfs_prepare_layoutreturn(lo);
+               spin_unlock(&inode->i_lock);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+               }
+       } else
+               spin_unlock(&inode->i_lock);
+}
+
 /*
  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
  * when the layout segment list is empty.
@@ -1091,7 +1085,7 @@ bool pnfs_roc(struct inode *ino)
 
        nfs4_stateid_copy(&stateid, &lo->plh_stateid);
        /* always send layoutreturn if being marked so */
-       if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+       if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                   &lo->plh_flags))
                layoutreturn = pnfs_prepare_layoutreturn(lo);
 
@@ -1772,7 +1766,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                        pnfs_set_plh_return_iomode(lo, return_range->iomode);
                        if (!mark_lseg_invalid(lseg, tmp_list))
                                remaining++;
-                       set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                       set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                        &lo->plh_flags);
                }
        return remaining;
index 9f4e2a47f4aa4ff41a590178dde2ddf160ae244f..1ac1db5f6dadb6508cf8658a53385add279eb9fa 100644 (file)
@@ -94,8 +94,8 @@ enum {
        NFS_LAYOUT_RO_FAILED = 0,       /* get ro layout failed stop trying */
        NFS_LAYOUT_RW_FAILED,           /* get rw layout failed stop trying */
        NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
-       NFS_LAYOUT_RETURN,              /* Return this layout ASAP */
-       NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
+       NFS_LAYOUT_RETURN,              /* layoutreturn in progress */
+       NFS_LAYOUT_RETURN_REQUESTED,    /* Return this layout ASAP */
        NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
        NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
 };
index ce43cd6d88c634773ae86a2922d3700d6f97ad33..5754835a288608bd7dc3b58b8f92a2a6ee8a20bc 100644 (file)
@@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
-                           struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
        spin_lock(cinfo->lock);
-       nfs_request_add_commit_list_locked(req, dst, cinfo);
+       nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
        spin_unlock(cinfo->lock);
        nfs_mark_page_unstable(req->wb_page, cinfo);
 }
@@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 {
        if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
                return;
-       nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+       nfs_request_add_commit_list(req, cinfo);
 }
 
 static void
index 819ad812c71b903200784f7c65622ff07ce59d72..4cba7865f4966b825e3d47c7d423fdb8ca11a262 100644 (file)
@@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u
        struct inode *inode = d_inode(resfh->fh_dentry);
        int status;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        status = security_inode_setsecctx(resfh->fh_dentry,
                label->data, label->len);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (status)
                /*
index 79f0307a5ec8331426ef331c5c843d59966dd5ff..dc8ebecf561866a2d6fcee729813528852be6313 100644 (file)
@@ -192,7 +192,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
        dir = nn->rec_file->f_path.dentry;
        /* lock the parent */
-       mutex_lock(&d_inode(dir)->i_mutex);
+       inode_lock(d_inode(dir));
 
        dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
        if (IS_ERR(dentry)) {
@@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 out_put:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        if (status == 0) {
                if (nn->in_grace) {
                        crp = nfs4_client_to_reclaim(dname, nn);
@@ -286,7 +286,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
        }
 
        status = iterate_dir(nn->rec_file, &ctx.ctx);
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
                if (!status) {
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
                list_del(&entry->list);
                kfree(entry);
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        nfs4_reset_creds(original_cred);
 
        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -322,7 +322,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
        dir = nn->rec_file->f_path.dentry;
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
        dentry = lookup_one_len(name, dir, namlen);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
@@ -335,7 +335,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
 out:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
        return status;
 }
 
index 0770bcb543c81d843106825a2c9c0da6be3d15a4..f84fe6bf9aee144cd7227711540f0027e24ef6c2 100644 (file)
@@ -288,7 +288,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
        }
 
        inode = d_inode(dentry);
-       mutex_lock_nested(&inode->i_mutex, subclass);
+       inode_lock_nested(inode, subclass);
        fill_pre_wcc(fhp);
        fhp->fh_locked = true;
 }
@@ -307,7 +307,7 @@ fh_unlock(struct svc_fh *fhp)
 {
        if (fhp->fh_locked) {
                fill_post_wcc(fhp);
-               mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
+               inode_unlock(d_inode(fhp->fh_dentry));
                fhp->fh_locked = false;
        }
 }
index 6739077f17fe7827beee7b7469b2c06450862362..5d2a57e4c03ad3494e0b08fc3dfbc0e13fb192df 100644 (file)
@@ -493,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
        dentry = fhp->fh_dentry;
 
-       mutex_lock(&d_inode(dentry)->i_mutex);
+       inode_lock(d_inode(dentry));
        host_error = security_inode_setsecctx(dentry, label->data, label->len);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        return nfserrno(host_error);
 }
 #else
index 10b22527a617dcdefc36d36ce4fc3d3658e20330..21a1e2e0d92fe22696ebac0bd702238fb95ec02f 100644 (file)
@@ -1003,7 +1003,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        isize = i_size_read(inode);
 
@@ -1113,6 +1113,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        if (ret == 1)
                ret = 0;
 
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
index aba43811d6ef1221c68b275afda0174f072343bf..e8fe24882b5baa417e3002ada770882f4771534b 100644 (file)
@@ -158,7 +158,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
 
        flags = nilfs_mask_flags(inode->i_mode, flags);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        oldflags = NILFS_I(inode)->i_flags;
 
@@ -186,7 +186,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
        nilfs_mark_inode_dirty(inode);
        ret = nilfs_transaction_commit(inode->i_sb);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        mnt_drop_write_file(filp);
        return ret;
 }
index 9e38dafa3bc78ec71acc7acec733d9cd52c4f40a..b2eff5816adc3e915f03665c4b283f116a7b1eab 100644 (file)
@@ -1509,7 +1509,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        err = filemap_write_and_wait_range(vi->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
 
        BUG_ON(!S_ISDIR(vi->i_mode));
        /* If the bitmap attribute inode is in memory sync it, too. */
@@ -1532,7 +1532,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        else
                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        return ret;
 }
 
index 9d383e5eff0ea5519bffb34528b5aeccbe6f3831..bed4d427dfaee110c7da84c70f32c673fbdc80aa 100644 (file)
@@ -1944,14 +1944,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        ssize_t written = 0;
        ssize_t err;
 
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
        /* We can write back this queue in page reclaim. */
        current->backing_dev_info = inode_to_bdi(vi);
        err = ntfs_prepare_file_for_write(iocb, from);
        if (iov_iter_count(from) && !err)
                written = ntfs_perform_write(file, from, iocb->ki_pos);
        current->backing_dev_info = NULL;
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        if (likely(written > 0)) {
                err = generic_write_sync(file, iocb->ki_pos, written);
                if (err < 0)
@@ -1996,7 +1996,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        err = filemap_write_and_wait_range(vi->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&vi->i_mutex);
+       inode_lock(vi);
 
        BUG_ON(S_ISDIR(vi->i_mode));
        if (!datasync || !NInoNonResident(NTFS_I(vi)))
@@ -2015,7 +2015,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        else
                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
-       mutex_unlock(&vi->i_mutex);
+       inode_unlock(vi);
        return ret;
 }
 
index d80e3315cab0ec73cfdd9b668594d588f559273e..9793e68ba1ddf47091d0dc80a324dfe27a39dbc1 100644 (file)
@@ -48,7 +48,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
                ntfs_error(vol->sb, "Quota inodes are not open.");
                return false;
        }
-       mutex_lock(&vol->quota_q_ino->i_mutex);
+       inode_lock(vol->quota_q_ino);
        ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
        if (!ictx) {
                ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
        ntfs_index_entry_mark_dirty(ictx);
 set_done:
        ntfs_index_ctx_put(ictx);
-       mutex_unlock(&vol->quota_q_ino->i_mutex);
+       inode_unlock(vol->quota_q_ino);
        /*
         * We set the flag so we do not try to mark the quotas out of date
         * again on remount.
@@ -110,7 +110,7 @@ done:
 err_out:
        if (ictx)
                ntfs_index_ctx_put(ictx);
-       mutex_unlock(&vol->quota_q_ino->i_mutex);
+       inode_unlock(vol->quota_q_ino);
        return false;
 }
 
index 2f77f8dfb861415c9e5196c129d6d4902e042ce8..1b38abdaa3ed3da7522f461ae49a77ccf5ee5982 100644 (file)
@@ -1284,10 +1284,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
         * Find the inode number for the hibernation file by looking up the
         * filename hiberfil.sys in the root directory.
         */
-       mutex_lock(&vol->root_ino->i_mutex);
+       inode_lock(vol->root_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
                        &name);
-       mutex_unlock(&vol->root_ino->i_mutex);
+       inode_unlock(vol->root_ino);
        if (IS_ERR_MREF(mref)) {
                ret = MREF_ERR(mref);
                /* If the file does not exist, Windows is not hibernated. */
@@ -1377,10 +1377,10 @@ static bool load_and_init_quota(ntfs_volume *vol)
         * Find the inode number for the quota file by looking up the filename
         * $Quota in the extended system files directory $Extend.
         */
-       mutex_lock(&vol->extend_ino->i_mutex);
+       inode_lock(vol->extend_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
                        &name);
-       mutex_unlock(&vol->extend_ino->i_mutex);
+       inode_unlock(vol->extend_ino);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, quotas are disabled and have
@@ -1460,10 +1460,10 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
         * Find the inode number for the transaction log file by looking up the
         * filename $UsnJrnl in the extended system files directory $Extend.
         */
-       mutex_lock(&vol->extend_ino->i_mutex);
+       inode_lock(vol->extend_ino);
        mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
                        &name);
-       mutex_unlock(&vol->extend_ino->i_mutex);
+       inode_unlock(vol->extend_ino);
        if (IS_ERR_MREF(mref)) {
                /*
                 * If the file does not exist, transaction logging is disabled,
index a3ded88718c93eec4bf9767018826c960285a07d..d002579c6f2b03a62209110a1ce84e7605d2651a 100644 (file)
@@ -5719,7 +5719,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
                goto bail;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -5776,7 +5776,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 out_commit:
        ocfs2_commit_trans(osb, handle);
 out:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 bail:
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
@@ -5832,7 +5832,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
 
-       BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+       BUG_ON(inode_trylock(tl_inode));
 
        start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
 
@@ -5980,7 +5980,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        struct ocfs2_dinode *di;
        struct ocfs2_truncate_log *tl;
 
-       BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+       BUG_ON(inode_trylock(tl_inode));
 
        di = (struct ocfs2_dinode *) tl_bh->b_data;
 
@@ -6008,7 +6008,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
                goto out;
        }
 
-       mutex_lock(&data_alloc_inode->i_mutex);
+       inode_lock(data_alloc_inode);
 
        status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
        if (status < 0) {
@@ -6035,7 +6035,7 @@ out_unlock:
        ocfs2_inode_unlock(data_alloc_inode, 1);
 
 out_mutex:
-       mutex_unlock(&data_alloc_inode->i_mutex);
+       inode_unlock(data_alloc_inode);
        iput(data_alloc_inode);
 
 out:
@@ -6047,9 +6047,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
        int status;
        struct inode *tl_inode = osb->osb_tl_inode;
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
        status = __ocfs2_flush_truncate_log(osb);
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        return status;
 }
@@ -6208,7 +6208,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
                (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
                num_recs);
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
        for(i = 0; i < num_recs; i++) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
                        status = __ocfs2_flush_truncate_log(osb);
@@ -6239,7 +6239,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
        }
 
 bail_up:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        return status;
 }
@@ -6346,7 +6346,7 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
                goto out;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        ret = ocfs2_inode_lock(inode, &di_bh, 1);
        if (ret) {
@@ -6395,7 +6395,7 @@ out_unlock:
        ocfs2_inode_unlock(inode, 1);
        brelse(di_bh);
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        iput(inode);
 out:
        while(head) {
@@ -6439,7 +6439,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
        handle_t *handle;
        int ret = 0;
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        while (head) {
                if (ocfs2_truncate_log_needs_flush(osb)) {
@@ -6471,7 +6471,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
                }
        }
 
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        while (head) {
                /* Premature exit may have left some dangling items. */
@@ -7355,7 +7355,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
        if (ret < 0) {
@@ -7422,7 +7422,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 0);
        brelse(main_bm_bh);
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 out:
        return ret;
index 7f604727f487b3680d7f145161288656dd1aaf80..794fd1587f34a3bc210d60770525a02f3cd59aba 100644 (file)
@@ -2046,9 +2046,9 @@ static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
        int ret = 0;
        unsigned int truncated_clusters;
 
-       mutex_lock(&osb->osb_tl_inode->i_mutex);
+       inode_lock(osb->osb_tl_inode);
        truncated_clusters = osb->truncated_clusters;
-       mutex_unlock(&osb->osb_tl_inode->i_mutex);
+       inode_unlock(osb->osb_tl_inode);
 
        /*
         * Check whether we can succeed in allocating if we free
index a3cc6d2fc896cea05a8605dcc84e09b7840a5e1a..a76b9ea7722e6d7d00d8c0c58217a4a0387f0ff0 100644 (file)
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
 
 void o2hb_exit(void)
 {
-       kfree(o2hb_db_livenodes);
-       kfree(o2hb_db_liveregions);
-       kfree(o2hb_db_quorumregions);
-       kfree(o2hb_db_failedregions);
        debugfs_remove(o2hb_debug_failedregions);
        debugfs_remove(o2hb_debug_quorumregions);
        debugfs_remove(o2hb_debug_liveregions);
        debugfs_remove(o2hb_debug_livenodes);
        debugfs_remove(o2hb_debug_dir);
+       kfree(o2hb_db_livenodes);
+       kfree(o2hb_db_liveregions);
+       kfree(o2hb_db_quorumregions);
+       kfree(o2hb_db_failedregions);
 }
 
 static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
 
        kfree(reg->hr_slots);
 
-       kfree(reg->hr_db_regnum);
-       kfree(reg->hr_db_livenodes);
        debugfs_remove(reg->hr_debug_livenodes);
        debugfs_remove(reg->hr_debug_regnum);
        debugfs_remove(reg->hr_debug_elapsed_time);
        debugfs_remove(reg->hr_debug_pinned);
        debugfs_remove(reg->hr_debug_dir);
+       kfree(reg->hr_db_livenodes);
+       kfree(reg->hr_db_regnum);
+       kfree(reg->hr_debug_elapsed_time);
+       kfree(reg->hr_debug_pinned);
 
        spin_lock(&o2hb_live_lock);
        list_del(&reg->hr_all_item);
index ffecf89c8c1cd23d532a9155660a6609e9d38551..e1adf285fc3142ab8c0856ea0a28d766c914ce3f 100644 (file)
@@ -4361,7 +4361,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
                mlog_errno(ret);
                goto out;
        }
-       mutex_lock(&dx_alloc_inode->i_mutex);
+       inode_lock(dx_alloc_inode);
 
        ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
        if (ret) {
@@ -4410,7 +4410,7 @@ out_unlock:
        ocfs2_inode_unlock(dx_alloc_inode, 1);
 
 out_mutex:
-       mutex_unlock(&dx_alloc_inode->i_mutex);
+       inode_unlock(dx_alloc_inode);
        brelse(dx_alloc_bh);
 out:
        iput(dx_alloc_inode);
index c5bdf02c213bd7ad5c086331a1ea8194bde40578..b94a425f0175fa84818d78742659c42293d96dba 100644 (file)
@@ -2367,6 +2367,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
                                                break;
                                        }
                                }
+                               dlm_lockres_clear_refmap_bit(dlm, res,
+                                               dead_node);
                                spin_unlock(&res->spinlock);
                                continue;
                        }
index f92612e4b9d61144b13021c83dd8b4b1e9553ac5..474e57f834e6caff12eaca03c8ece4d35303d1e0 100644 (file)
@@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
        unsigned int gen;
        int noqueue_attempted = 0;
        int dlm_locked = 0;
+       int kick_dc = 0;
 
        if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
                mlog_errno(-EINVAL);
@@ -1524,7 +1525,12 @@ update_holders:
 unlock:
        lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
 
+       /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+       kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
+
        spin_unlock_irqrestore(&lockres->l_lock, flags);
+       if (kick_dc)
+               ocfs2_wake_downconvert_thread(osb);
 out:
        /*
         * This is helping work around a lock inversion between the page lock
index d6312793250927b1efacf4f09c18c38f7da75e5d..7cb38fdca229f050f89aed34c93c85db11a99ff8 100644 (file)
@@ -1872,7 +1872,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
        if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
                return -EROFS;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * This prevents concurrent writes on other nodes
@@ -1991,7 +1991,7 @@ out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return ret;
 }
 
@@ -2299,7 +2299,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
        direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
 relock:
        /*
@@ -2435,7 +2435,7 @@ out:
                ocfs2_rw_unlock(inode, rw_level);
 
 out_mutex:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (written)
                ret = written;
@@ -2547,7 +2547,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file->f_mapping->host;
        int ret = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        switch (whence) {
        case SEEK_SET:
@@ -2585,7 +2585,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (ret)
                return ret;
        return offset;
index 97a563bab9a871ee3e371a9796b990c24835f47f..36294446d9607ee9f0d07814552fd6f0bf97e6ab 100644 (file)
@@ -630,10 +630,10 @@ static int ocfs2_remove_inode(struct inode *inode,
                goto bail;
        }
 
-       mutex_lock(&inode_alloc_inode->i_mutex);
+       inode_lock(inode_alloc_inode);
        status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
        if (status < 0) {
-               mutex_unlock(&inode_alloc_inode->i_mutex);
+               inode_unlock(inode_alloc_inode);
 
                mlog_errno(status);
                goto bail;
@@ -680,7 +680,7 @@ bail_commit:
        ocfs2_commit_trans(osb, handle);
 bail_unlock:
        ocfs2_inode_unlock(inode_alloc_inode, 1);
-       mutex_unlock(&inode_alloc_inode->i_mutex);
+       inode_unlock(inode_alloc_inode);
        brelse(inode_alloc_bh);
 bail:
        iput(inode_alloc_inode);
@@ -751,10 +751,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
                /* Lock the orphan dir. The lock will be held for the entire
                 * delete_inode operation. We do this now to avoid races with
                 * recovery completion on other nodes. */
-               mutex_lock(&orphan_dir_inode->i_mutex);
+               inode_lock(orphan_dir_inode);
                status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
                if (status < 0) {
-                       mutex_unlock(&orphan_dir_inode->i_mutex);
+                       inode_unlock(orphan_dir_inode);
 
                        mlog_errno(status);
                        goto bail;
@@ -803,7 +803,7 @@ bail_unlock_dir:
                return status;
 
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        brelse(orphan_dir_bh);
 bail:
        iput(orphan_dir_inode);
index 16b0bb482ea7cb68540c8fe7ea133b1bc89185ce..4506ec5ec2ea624235a36c06cb755ef2211e0914 100644 (file)
@@ -86,7 +86,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
        unsigned oldflags;
        int status;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        status = ocfs2_inode_lock(inode, &bh, 1);
        if (status < 0) {
@@ -135,7 +135,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 bail_unlock:
        ocfs2_inode_unlock(inode, 1);
 bail:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        brelse(bh);
 
@@ -287,7 +287,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
        struct ocfs2_dinode *dinode_alloc = NULL;
 
        if (inode_alloc)
-               mutex_lock(&inode_alloc->i_mutex);
+               inode_lock(inode_alloc);
 
        if (o2info_coherent(&fi->ifi_req)) {
                status = ocfs2_inode_lock(inode_alloc, &bh, 0);
@@ -317,7 +317,7 @@ bail:
                ocfs2_inode_unlock(inode_alloc, 0);
 
        if (inode_alloc)
-               mutex_unlock(&inode_alloc->i_mutex);
+               inode_unlock(inode_alloc);
 
        brelse(bh);
 
@@ -547,7 +547,7 @@ static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
        struct ocfs2_dinode *gb_dinode = NULL;
 
        if (gb_inode)
-               mutex_lock(&gb_inode->i_mutex);
+               inode_lock(gb_inode);
 
        if (o2info_coherent(&ffg->iff_req)) {
                status = ocfs2_inode_lock(gb_inode, &bh, 0);
@@ -604,7 +604,7 @@ bail:
                ocfs2_inode_unlock(gb_inode, 0);
 
        if (gb_inode)
-               mutex_unlock(&gb_inode->i_mutex);
+               inode_unlock(gb_inode);
 
        iput(gb_inode);
        brelse(bh);
index 3772a2dbb980c950279349146f14c4ec3887ead2..61b833b721d89e4184453760507996d5f208800a 100644 (file)
@@ -2088,7 +2088,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
                return status;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
        status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
        if (status < 0) {
                mlog_errno(status);
@@ -2106,7 +2106,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 out_cluster:
        ocfs2_inode_unlock(orphan_dir_inode, 0);
 out:
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
        return status;
 }
@@ -2196,7 +2196,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
                oi->ip_next_orphan = NULL;
 
                if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        ret = ocfs2_rw_lock(inode, 1);
                        if (ret < 0) {
                                mlog_errno(ret);
@@ -2235,7 +2235,7 @@ unlock_inode:
 unlock_rw:
                        ocfs2_rw_unlock(inode, 1);
 unlock_mutex:
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
 
                        /* clear dio flag in ocfs2_inode_info */
                        oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
index e9c99e35f5ea74e160c158ca05b01371057d24d3..7d62c43a2c3e24934965ba5df66f44e33fc0488a 100644 (file)
@@ -414,7 +414,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (status < 0) {
@@ -468,7 +468,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
@@ -506,7 +506,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        status = ocfs2_read_inode_block_full(inode, &alloc_bh,
                                             OCFS2_BH_IGNORE_CACHE);
@@ -539,7 +539,7 @@ bail:
        brelse(alloc_bh);
 
        if (inode) {
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                iput(inode);
        }
 
@@ -571,7 +571,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (status < 0) {
@@ -601,7 +601,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
 
        brelse(main_bm_bh);
 
@@ -643,7 +643,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&local_alloc_inode->i_mutex);
+       inode_lock(local_alloc_inode);
 
        /*
         * We must double check state and allocator bits because
@@ -709,7 +709,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
        status = 0;
 bail:
        if (status < 0 && local_alloc_inode) {
-               mutex_unlock(&local_alloc_inode->i_mutex);
+               inode_unlock(local_alloc_inode);
                iput(local_alloc_inode);
        }
 
index 124471d26a73f4fe79738e2b89662d82a7ad0561..e3d05d9901a3da26b123865545446f5ef96b08a5 100644 (file)
@@ -276,7 +276,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
         *      context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
         */
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -338,7 +338,7 @@ out_commit:
        ocfs2_commit_trans(osb, handle);
 
 out_unlock_mutex:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        if (context->data_ac) {
                ocfs2_free_alloc_context(context->data_ac);
@@ -632,7 +632,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                goto out;
        }
 
-       mutex_lock(&gb_inode->i_mutex);
+       inode_lock(gb_inode);
 
        ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
        if (ret) {
@@ -640,7 +640,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
                goto out_unlock_gb_mutex;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
@@ -708,11 +708,11 @@ out_commit:
        brelse(gd_bh);
 
 out_unlock_tl_inode:
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        ocfs2_inode_unlock(gb_inode, 1);
 out_unlock_gb_mutex:
-       mutex_unlock(&gb_inode->i_mutex);
+       inode_unlock(gb_inode);
        brelse(gb_bh);
        iput(gb_inode);
 
@@ -905,7 +905,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
        if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
                return -EROFS;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /*
         * This prevents concurrent writes from other nodes
@@ -969,7 +969,7 @@ out_inode_unlock:
 out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        return status;
 }
index ab42c38031b17dccb5a872781cec7784ca2d4cfa..6b3e87189a6467fd3c72533db5c52d149ba2064d 100644 (file)
@@ -1045,7 +1045,7 @@ leave:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -1664,7 +1664,7 @@ bail:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -2121,11 +2121,11 @@ static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
                return ret;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
 
        ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (ret < 0) {
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
 
                mlog_errno(ret);
@@ -2226,7 +2226,7 @@ out:
 
        if (ret) {
                ocfs2_inode_unlock(orphan_dir_inode, 1);
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
        }
 
@@ -2495,7 +2495,7 @@ out:
                        ocfs2_free_alloc_context(inode_ac);
 
                /* Unroll orphan dir locking */
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                ocfs2_inode_unlock(orphan_dir, 1);
                iput(orphan_dir);
        }
@@ -2602,7 +2602,7 @@ leave:
        if (orphan_dir) {
                /* This was locked for us in ocfs2_prepare_orphan_dir() */
                ocfs2_inode_unlock(orphan_dir, 1);
-               mutex_unlock(&orphan_dir->i_mutex);
+               inode_unlock(orphan_dir);
                iput(orphan_dir);
        }
 
@@ -2689,7 +2689,7 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
 
 bail_unlock_orphan:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
 
        ocfs2_free_dir_lookup_result(&orphan_insert);
@@ -2721,10 +2721,10 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
                goto bail;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (status < 0) {
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
                mlog_errno(status);
                goto bail;
@@ -2770,7 +2770,7 @@ bail_commit:
 
 bail_unlock_orphan:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        brelse(orphan_dir_bh);
        iput(orphan_dir_inode);
 
@@ -2834,12 +2834,12 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
                goto leave;
        }
 
-       mutex_lock(&orphan_dir_inode->i_mutex);
+       inode_lock(orphan_dir_inode);
 
        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
        if (status < 0) {
                mlog_errno(status);
-               mutex_unlock(&orphan_dir_inode->i_mutex);
+               inode_unlock(orphan_dir_inode);
                iput(orphan_dir_inode);
                goto leave;
        }
@@ -2901,7 +2901,7 @@ out_commit:
        ocfs2_commit_trans(osb, handle);
 orphan_unlock:
        ocfs2_inode_unlock(orphan_dir_inode, 1);
-       mutex_unlock(&orphan_dir_inode->i_mutex);
+       inode_unlock(orphan_dir_inode);
        iput(orphan_dir_inode);
 leave:
 
index fde9ef18cff3c4ba59d7891ab395bc739fdbf81e..9c9dd30bc94541fa89baed2b0ac8c2a3c8a72e31 100644 (file)
@@ -308,7 +308,7 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
                WARN_ON(bh != oinfo->dqi_gqi_bh);
        spin_unlock(&dq_data_lock);
        if (ex) {
-               mutex_lock(&oinfo->dqi_gqinode->i_mutex);
+               inode_lock(oinfo->dqi_gqinode);
                down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
        } else {
                down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
@@ -320,7 +320,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
 {
        if (ex) {
                up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
-               mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
+               inode_unlock(oinfo->dqi_gqinode);
        } else {
                up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
        }
index 252119860e6c1ae36fbfe7d5b5b84010341f8e5b..3eff031aaf264df58b74c4165749b0c2385c6312 100644 (file)
@@ -807,7 +807,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
                        mlog_errno(ret);
                        goto out;
                }
-               mutex_lock(&alloc_inode->i_mutex);
+               inode_lock(alloc_inode);
 
                ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
                if (ret) {
@@ -867,7 +867,7 @@ out_unlock:
        }
 out_mutex:
        if (alloc_inode) {
-               mutex_unlock(&alloc_inode->i_mutex);
+               inode_unlock(alloc_inode);
                iput(alloc_inode);
        }
 out:
@@ -4197,7 +4197,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
                goto out;
        }
 
-       mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(new_inode, I_MUTEX_CHILD);
        ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
                                      OI_LS_REFLINK_TARGET);
        if (ret) {
@@ -4231,7 +4231,7 @@ inode_unlock:
        ocfs2_inode_unlock(new_inode, 1);
        brelse(new_bh);
 out_unlock:
-       mutex_unlock(&new_inode->i_mutex);
+       inode_unlock(new_inode);
 out:
        if (!ret) {
                ret = filemap_fdatawait(inode->i_mapping);
@@ -4402,11 +4402,11 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
                        return error;
        }
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = dquot_initialize(dir);
        if (!error)
                error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (!error)
                fsnotify_create(dir, new_dentry);
        return error;
index 79b8021302b3eed84b9c59637cddd0e4844eb78b..576b9a04873f2f064fe3f66842713768b8f03f83 100644 (file)
@@ -301,7 +301,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (ret < 0) {
@@ -375,7 +375,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
@@ -486,7 +486,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
                goto out;
        }
 
-       mutex_lock(&main_bm_inode->i_mutex);
+       inode_lock(main_bm_inode);
 
        ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
        if (ret < 0) {
@@ -590,7 +590,7 @@ out_unlock:
        ocfs2_inode_unlock(main_bm_inode, 1);
 
 out_mutex:
-       mutex_unlock(&main_bm_inode->i_mutex);
+       inode_unlock(main_bm_inode);
        iput(main_bm_inode);
 
 out:
index fc6d25f6d4442c79dd376da24f18c3e3355232ed..2f19aeec5482106de43b65bc6598c42fb4ed2e63 100644 (file)
@@ -141,7 +141,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
                if (ac->ac_which != OCFS2_AC_USE_LOCAL)
                        ocfs2_inode_unlock(inode, 1);
 
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
 
                iput(inode);
                ac->ac_inode = NULL;
@@ -797,11 +797,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
                return -EINVAL;
        }
 
-       mutex_lock(&alloc_inode->i_mutex);
+       inode_lock(alloc_inode);
 
        status = ocfs2_inode_lock(alloc_inode, &bh, 1);
        if (status < 0) {
-               mutex_unlock(&alloc_inode->i_mutex);
+               inode_unlock(alloc_inode);
                iput(alloc_inode);
 
                mlog_errno(status);
@@ -2875,10 +2875,10 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
                goto bail;
        }
 
-       mutex_lock(&inode_alloc_inode->i_mutex);
+       inode_lock(inode_alloc_inode);
        status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
        if (status < 0) {
-               mutex_unlock(&inode_alloc_inode->i_mutex);
+               inode_unlock(inode_alloc_inode);
                iput(inode_alloc_inode);
                mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
                     (u32)suballoc_slot, status);
@@ -2891,7 +2891,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
                mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
 
        ocfs2_inode_unlock(inode_alloc_inode, 0);
-       mutex_unlock(&inode_alloc_inode->i_mutex);
+       inode_unlock(inode_alloc_inode);
 
        iput(inode_alloc_inode);
        brelse(alloc_bh);
index f0e241ffd94fc9d9e4ffb3d9afbda6e6eb932c3b..7d3d979f57d9142169f93f2c88b27a612dc06dc4 100644 (file)
@@ -2524,7 +2524,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
                mlog_errno(ret);
                goto out;
        }
-       mutex_lock(&xb_alloc_inode->i_mutex);
+       inode_lock(xb_alloc_inode);
 
        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
        if (ret < 0) {
@@ -2549,7 +2549,7 @@ out_unlock:
        ocfs2_inode_unlock(xb_alloc_inode, 1);
        brelse(xb_alloc_bh);
 out_mutex:
-       mutex_unlock(&xb_alloc_inode->i_mutex);
+       inode_unlock(xb_alloc_inode);
        iput(xb_alloc_inode);
 out:
        brelse(blk_bh);
@@ -3619,17 +3619,17 @@ int ocfs2_xattr_set(struct inode *inode,
                }
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
                if (ret < 0) {
-                       mutex_unlock(&tl_inode->i_mutex);
+                       inode_unlock(tl_inode);
                        mlog_errno(ret);
                        goto cleanup;
                }
        }
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
                                        &xbs, &ctxt, ref_meta, &credits);
@@ -5460,7 +5460,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
                return ret;
        }
 
-       mutex_lock(&tl_inode->i_mutex);
+       inode_lock(tl_inode);
 
        if (ocfs2_truncate_log_needs_flush(osb)) {
                ret = __ocfs2_flush_truncate_log(osb);
@@ -5504,7 +5504,7 @@ out_commit:
 out:
        ocfs2_schedule_truncate_log_flush(osb, 1);
 
-       mutex_unlock(&tl_inode->i_mutex);
+       inode_unlock(tl_inode);
 
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
index b25b1542c5304a74999db5ad52af57e89133a68e..55bdc75e2172a38487ea2942176c127fdad0eb82 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -58,10 +58,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
        if (ret)
                newattrs.ia_valid |= ret | ATTR_FORCE;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock(dentry->d_inode);
        /* Note any delegations or leases have already been broken: */
        ret = notify_change(dentry, &newattrs, NULL);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(dentry->d_inode);
        return ret;
 }
 
@@ -510,7 +510,7 @@ static int chmod_common(struct path *path, umode_t mode)
        if (error)
                return error;
 retry_deleg:
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_path_chmod(path, mode);
        if (error)
                goto out_unlock;
@@ -518,7 +518,7 @@ retry_deleg:
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        error = notify_change(path->dentry, &newattrs, &delegated_inode);
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
@@ -593,11 +593,11 @@ retry_deleg:
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |=
                        ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_path_chown(path, uid, gid);
        if (!error)
                error = notify_change(path->dentry, &newattrs, &delegated_inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
index eff6319d50373c05d4d829b0fd83fa784d5f187b..d894e7cd9a864239ea19dc86d405239ee2405607 100644 (file)
@@ -248,9 +248,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
        if (err)
                goto out_cleanup;
 
-       mutex_lock(&newdentry->d_inode->i_mutex);
+       inode_lock(newdentry->d_inode);
        err = ovl_set_attr(newdentry, stat);
-       mutex_unlock(&newdentry->d_inode->i_mutex);
+       inode_unlock(newdentry->d_inode);
        if (err)
                goto out_cleanup;
 
index 692ceda3bc21f6976b65f3e2d5aa4b7ef2e9c5e8..ed95272d57a61af3f26a58c71cf789d3c94e28ce 100644 (file)
@@ -167,7 +167,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
        struct dentry *newdentry;
        int err;
 
-       mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(udir, I_MUTEX_PARENT);
        newdentry = lookup_one_len(dentry->d_name.name, upperdir,
                                   dentry->d_name.len);
        err = PTR_ERR(newdentry);
@@ -185,7 +185,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 out_dput:
        dput(newdentry);
 out_unlock:
-       mutex_unlock(&udir->i_mutex);
+       inode_unlock(udir);
        return err;
 }
 
@@ -258,9 +258,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
        if (err)
                goto out_cleanup;
 
-       mutex_lock(&opaquedir->d_inode->i_mutex);
+       inode_lock(opaquedir->d_inode);
        err = ovl_set_attr(opaquedir, &stat);
-       mutex_unlock(&opaquedir->d_inode->i_mutex);
+       inode_unlock(opaquedir->d_inode);
        if (err)
                goto out_cleanup;
 
@@ -599,7 +599,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
        struct dentry *upper = ovl_dentry_upper(dentry);
        int err;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        err = -ESTALE;
        if (upper->d_parent == upperdir) {
                /* Don't let d_delete() think it can reset d_inode */
@@ -619,7 +619,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
         * now.
         */
        d_drop(dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 
        return err;
 }
index bf996e574f3d842995847f72b8ee7bb7466f2539..49e204560655a8f1737a2a03a38b7a88604a5a4c 100644 (file)
@@ -63,9 +63,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
        if (!err) {
                upperdentry = ovl_dentry_upper(dentry);
 
-               mutex_lock(&upperdentry->d_inode->i_mutex);
+               inode_lock(upperdentry->d_inode);
                err = notify_change(upperdentry, attr, NULL);
-               mutex_unlock(&upperdentry->d_inode->i_mutex);
+               inode_unlock(upperdentry->d_inode);
        }
        ovl_drop_write(dentry);
 out:
index adcb1398c48128682875b2ff4a6b42d95c1e116e..fdaf28f75e12acf1f4dba023c31017c81747744b 100644 (file)
@@ -228,7 +228,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
                                dput(dentry);
                        }
                }
-               mutex_unlock(&dir->d_inode->i_mutex);
+               inode_unlock(dir->d_inode);
        }
        revert_creds(old_cred);
        put_cred(override_cred);
@@ -399,7 +399,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
        loff_t res;
        struct ovl_dir_file *od = file->private_data;
 
-       mutex_lock(&file_inode(file)->i_mutex);
+       inode_lock(file_inode(file));
        if (!file->f_pos)
                ovl_dir_reset(file);
 
@@ -429,7 +429,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
                res = offset;
        }
 out_unlock:
-       mutex_unlock(&file_inode(file)->i_mutex);
+       inode_unlock(file_inode(file));
 
        return res;
 }
@@ -454,10 +454,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
                        ovl_path_upper(dentry, &upperpath);
                        realfile = ovl_path_open(&upperpath, O_RDONLY);
                        smp_mb__before_spinlock();
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                        if (!od->upperfile) {
                                if (IS_ERR(realfile)) {
-                                       mutex_unlock(&inode->i_mutex);
+                                       inode_unlock(inode);
                                        return PTR_ERR(realfile);
                                }
                                od->upperfile = realfile;
@@ -467,7 +467,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
                                        fput(realfile);
                                realfile = od->upperfile;
                        }
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                }
        }
 
@@ -479,9 +479,9 @@ static int ovl_dir_release(struct inode *inode, struct file *file)
        struct ovl_dir_file *od = file->private_data;
 
        if (od->cache) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                ovl_cache_put(od, file->f_path.dentry);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        fput(od->realfile);
        if (od->upperfile)
@@ -557,7 +557,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 {
        struct ovl_cache_entry *p;
 
-       mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
        list_for_each_entry(p, list, l_node) {
                struct dentry *dentry;
 
@@ -575,5 +575,5 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
                        ovl_cleanup(upper->d_inode, dentry);
                dput(dentry);
        }
-       mutex_unlock(&upper->d_inode->i_mutex);
+       inode_unlock(upper->d_inode);
 }
index d250604f985ab91abaa60e70a6a4025049016916..8d826bd56b26b10d641d7ce1f7a47b747851fac9 100644 (file)
@@ -229,7 +229,7 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
        WARN_ON(oe->__upperdentry);
        BUG_ON(!upperdentry->d_inode);
        /*
@@ -244,7 +244,7 @@ void ovl_dentry_version_inc(struct dentry *dentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(dentry->d_inode));
        oe->version++;
 }
 
@@ -252,7 +252,7 @@ u64 ovl_dentry_version_get(struct dentry *dentry)
 {
        struct ovl_entry *oe = dentry->d_fsdata;
 
-       WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       WARN_ON(!inode_is_locked(dentry->d_inode));
        return oe->version;
 }
 
@@ -375,9 +375,9 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
 {
        struct dentry *dentry;
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       inode_lock(dir->d_inode);
        dentry = lookup_one_len(name->name, dir, name->len);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       inode_unlock(dir->d_inode);
 
        if (IS_ERR(dentry)) {
                if (PTR_ERR(dentry) == -ENOENT)
@@ -744,7 +744,7 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
        if (err)
                return ERR_PTR(err);
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
 retry:
        work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
                              strlen(OVL_WORKDIR_NAME));
@@ -770,7 +770,7 @@ retry:
                        goto out_dput;
        }
 out_unlock:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        mnt_drop_write(mnt);
 
        return work;
index 42cf8ddf0e5599da6621fcdf741d6a61fd2f2d7a..ab8dad3ccb6a8bac13a2eab25eeb2b68791d60f5 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned int pipe_min_size = PAGE_SIZE;
 
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
        return retval;
 }
 
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+                                 unsigned long old, unsigned long new)
+{
+       atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+       return pipe_user_pages_soft &&
+              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+       return pipe_user_pages_hard &&
+              atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
 struct pipe_inode_info *alloc_pipe_info(void)
 {
        struct pipe_inode_info *pipe;
 
        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
        if (pipe) {
-               pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+               unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+               struct user_struct *user = get_current_user();
+
+               if (!too_many_pipe_buffers_hard(user)) {
+                       if (too_many_pipe_buffers_soft(user))
+                               pipe_bufs = 1;
+                       pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+               }
+
                if (pipe->bufs) {
                        init_waitqueue_head(&pipe->wait);
                        pipe->r_counter = pipe->w_counter = 1;
-                       pipe->buffers = PIPE_DEF_BUFFERS;
+                       pipe->buffers = pipe_bufs;
+                       pipe->user = user;
+                       account_pipe_buffers(pipe, 0, pipe_bufs);
                        mutex_init(&pipe->mutex);
                        return pipe;
                }
+               free_uid(user);
                kfree(pipe);
        }
 
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 {
        int i;
 
+       account_pipe_buffers(pipe, pipe->buffers, 0);
+       free_uid(pipe->user);
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
                if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
                        memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
        }
 
+       account_pipe_buffers(pipe, pipe->buffers, nr_pages);
        pipe->curbuf = 0;
        kfree(pipe->bufs);
        pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
                if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
                        ret = -EPERM;
                        goto out;
+               } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+                           too_many_pipe_buffers_soft(pipe->user)) &&
+                          !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+                       ret = -EPERM;
+                       goto out;
                }
                ret = pipe_set_size(pipe, nr_pages);
                break;
index 92e6726f6e3732573bd9a64f3b313cc3508ce519..a939f5ed7f89ccb39673fef11beb542e5bef8159 100644 (file)
@@ -552,9 +552,9 @@ static int open_kcore(struct inode *inode, struct file *filp)
        if (kcore_need_update)
                kcore_update_ram();
        if (i_size_read(inode) != proc_root_kcore->size) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, proc_root_kcore->size);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
index 67e8db442cf03802c7758a76dd8adf79b46a3a50..b6a8d3529fea9714fb25d063e9505b198201e7ab 100644 (file)
@@ -50,7 +50,7 @@ int proc_setup_self(struct super_block *s)
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *self;
        
-       mutex_lock(&root_inode->i_mutex);
+       inode_lock(root_inode);
        self = d_alloc_name(s->s_root, "self");
        if (self) {
                struct inode *inode = new_inode_pseudo(s);
@@ -69,7 +69,7 @@ int proc_setup_self(struct super_block *s)
        } else {
                self = ERR_PTR(-ENOMEM);
        }
-       mutex_unlock(&root_inode->i_mutex);
+       inode_unlock(root_inode);
        if (IS_ERR(self)) {
                pr_err("proc_fill_super: can't allocate /proc/self\n");
                return PTR_ERR(self);
index 71ffc91060f6d6ad32081bfa53d2a3c7b3706e53..fa95ab2d36740803e06da898a4851db317b86a9e 100644 (file)
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
                                sizeof(struct proc_maps_private));
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
 {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= vma->vm_mm->start_stack &&
+                       vma->vm_end >= vma->vm_mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
 
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
 }
 
 static void
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
        name = arch_vma_name(vma);
        if (!name) {
-               pid_t tid;
-
                if (!mm) {
                        name = "[vdso]";
                        goto done;
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
                        goto done;
                }
 
-               tid = pid_of_stack(priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack)) {
-                               name = "[stack]";
-                       } else {
-                               /* Thread stack in /proc/PID/maps */
-                               seq_pad(m, ' ');
-                               seq_printf(m, "[stack:%d]", tid);
-                       }
-               }
+               if (is_stack(priv, vma, is_pid))
+                       name = "[stack]";
        }
 
 done:
@@ -602,7 +593,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                smaps_pmd_entry(pmd, addr, walk);
                spin_unlock(ptl);
                return 0;
@@ -913,7 +905,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        spinlock_t *ptl;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
                        clear_soft_dirty_pmd(vma, addr, pmd);
                        goto out;
@@ -1187,7 +1180,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
        int err = 0;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmdp, vma);
+       if (ptl) {
                u64 flags = 0, frame = 0;
                pmd_t pmd = *pmdp;
 
@@ -1519,7 +1513,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
        pte_t *orig_pte;
        pte_t *pte;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
@@ -1548,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
+       pte_t huge_pte = huge_ptep_get(pte);
        struct numa_maps *md;
        struct page *page;
 
-       if (!pte_present(*pte))
+       if (!pte_present(huge_pte))
                return 0;
 
-       page = pte_page(*pte);
+       page = pte_page(huge_pte);
        if (!page)
                return 0;
 
        md = walk->private;
-       gather_stats(page, md, pte_dirty(*pte), 1);
+       gather_stats(page, md, pte_dirty(huge_pte), 1);
        return 0;
 }
 
@@ -1613,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
                seq_file_path(m, file, "\n\t= ");
        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                seq_puts(m, " heap");
-       } else {
-               pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_puts(m, " stack");
-                       else
-                               seq_printf(m, " stack:%d", tid);
-               }
+       } else if (is_stack(proc_priv, vma, is_pid)) {
+               seq_puts(m, " stack");
        }
 
        if (is_vm_hugetlb_page(vma))
index e0d64c92e4f6576c38a8a4a7cc9b8d13a2b3362e..faacb0c0d857602111bfc04f2e374c451059c358 100644 (file)
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
        return size;
 }
 
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
 {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
-
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+       struct mm_struct *mm = vma->vm_mm;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= mm->start_stack &&
+                       vma->vm_end >= mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
+
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
 }
 
 /*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
        if (file) {
                seq_pad(m, ' ');
                seq_file_path(m, file, "");
-       } else if (mm) {
-               pid_t tid = pid_of_stack(priv, vma, is_pid);
-
-               if (tid != 0) {
-                       seq_pad(m, ' ');
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_printf(m, "[stack]");
-                       else
-                               seq_printf(m, "[stack:%d]", tid);
-               }
+       } else if (mm && is_stack(priv, vma, is_pid)) {
+               seq_pad(m, ' ');
+               seq_printf(m, "[stack]");
        }
 
        seq_putc(m, '\n');
index 9eacd59e0360f1367a084a1e7bfa5dca0e3fe170..e58a31e8fb2a186aaa2d3dee62811f090e2ade57 100644 (file)
@@ -52,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *thread_self;
 
-       mutex_lock(&root_inode->i_mutex);
+       inode_lock(root_inode);
        thread_self = d_alloc_name(s->s_root, "thread-self");
        if (thread_self) {
                struct inode *inode = new_inode_pseudo(s);
@@ -71,7 +71,7 @@ int proc_setup_thread_self(struct super_block *s)
        } else {
                thread_self = ERR_PTR(-ENOMEM);
        }
-       mutex_unlock(&root_inode->i_mutex);
+       inode_unlock(root_inode);
        if (IS_ERR(thread_self)) {
                pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
                return PTR_ERR(thread_self);
index d8c439d813ce1bd0a415fd09a8407c6c6192d49b..dc645b66cd79aea96ea723f8b127a16f7d6f70cd 100644 (file)
@@ -377,7 +377,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
                break;
        }
 
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
 
        dentry = d_alloc_name(root, name);
        if (!dentry)
@@ -397,12 +397,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
        list_add(&private->list, &allpstore);
        spin_unlock_irqrestore(&allpstore_lock, flags);
 
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
 
        return 0;
 
 fail_lockedalloc:
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        kfree(private);
 fail_alloc:
        iput(inode);
index fbd70af98820752ca7126363c4597de3d781b14d..3c3b81bb6dfebbd3d6c3f1f8f1252e46308bf9c9 100644 (file)
@@ -682,9 +682,9 @@ int dquot_quota_sync(struct super_block *sb, int type)
                        continue;
                if (!sb_has_quota_active(sb, cnt))
                        continue;
-               mutex_lock(&dqopt->files[cnt]->i_mutex);
+               inode_lock(dqopt->files[cnt]);
                truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
-               mutex_unlock(&dqopt->files[cnt]->i_mutex);
+               inode_unlock(dqopt->files[cnt]);
        }
        mutex_unlock(&dqopt->dqonoff_mutex);
 
@@ -2162,12 +2162,12 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
                        /* If quota was reenabled in the meantime, we have
                         * nothing to do */
                        if (!sb_has_quota_loaded(sb, cnt)) {
-                               mutex_lock(&toputinode[cnt]->i_mutex);
+                               inode_lock(toputinode[cnt]);
                                toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
                                  S_NOATIME | S_NOQUOTA);
                                truncate_inode_pages(&toputinode[cnt]->i_data,
                                                     0);
-                               mutex_unlock(&toputinode[cnt]->i_mutex);
+                               inode_unlock(toputinode[cnt]);
                                mark_inode_dirty_sync(toputinode[cnt]);
                        }
                        mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2258,11 +2258,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
                /* We don't want quota and atime on quota files (deadlocks
                 * possible) Also nobody should write to the file - we use
                 * special IO operations which ignore the immutable bit. */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
                                             S_NOQUOTA);
                inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                /*
                 * When S_NOQUOTA is set, remove dquot references as no more
                 * references can be added
@@ -2305,12 +2305,12 @@ out_file_init:
        iput(inode);
 out_lock:
        if (oldflags != -1) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                /* Set the flags back (in the case of accidental quotaon()
                 * on a wrong file we don't want to mess up the flags) */
                inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
                inode->i_flags |= oldflags;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
@@ -2430,9 +2430,9 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
        struct dentry *dentry;
        int error;
 
-       mutex_lock(&d_inode(sb->s_root)->i_mutex);
+       inode_lock(d_inode(sb->s_root));
        dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
-       mutex_unlock(&d_inode(sb->s_root)->i_mutex);
+       inode_unlock(d_inode(sb->s_root));
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
index 06b07d5a08fec4be6d83e9c92192963da6c8bc5c..324ec271cc4e64868c34e3ff2f28ac2c0542475e 100644 (file)
@@ -238,7 +238,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
        struct inode *inode = file_inode(file);
        loff_t retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        switch (whence) {
                case SEEK_END:
                        offset += i_size_read(inode);
@@ -283,7 +283,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
                retval = offset;
        }
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return retval;
 }
 EXPORT_SYMBOL(default_llseek);
@@ -1656,6 +1656,9 @@ next_file:
                mnt_drop_write_file(dst_file);
 next_loop:
                fdput(dst_fd);
+
+               if (fatal_signal_pending(current))
+                       goto out;
        }
 
 out:
index ced679179cac0686407c3743cff177289bfc3959..e69ef3b79787b9fb4f02ab65b35c28571c9df1a0 100644 (file)
@@ -44,7 +44,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
                fsnotify_access(file);
                file_accessed(file);
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return res;
 }
index 4a024e2ceb9f76fbf9156d0dc84c3e09a4a1ff15..3abd4004184ba0b49ddc91dac59795230125e133 100644 (file)
@@ -38,11 +38,11 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        reiserfs_write_lock(inode->i_sb);
        err = reiserfs_commit_for_inode(inode);
        reiserfs_write_unlock(inode->i_sb);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (err < 0)
                return err;
        return 0;
index 96a1bcf33db4435098baa153f807a71274543aaf..9424a4ba93a9504b12c75c08d0f4a4cd8f49fe20 100644 (file)
@@ -158,7 +158,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
        if (err)
                return err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        BUG_ON(!S_ISREG(inode->i_mode));
        err = sync_mapping_buffers(inode->i_mapping);
        reiserfs_write_lock(inode->i_sb);
@@ -166,7 +166,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
        reiserfs_write_unlock(inode->i_sb);
        if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (barrier_done < 0)
                return barrier_done;
        return (err < 0) ? -EIO : 0;
index 6ec8a30a0911b953e0c79ddcbd1458c8fc13142a..036a1fc0a8c35655a2bdb6cfb5b79dfccca04d54 100644 (file)
@@ -224,7 +224,7 @@ out_unlock:
        page_cache_release(page);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        reiserfs_write_unlock(inode->i_sb);
        return retval;
 }
index 05db7473bcb5c64078dc572d2ef8e455a770dbba..c0306ec8ed7b8f5d4eef97fd1cf4c7653c2b1f20 100644 (file)
@@ -288,7 +288,7 @@ static int finish_unfinished(struct super_block *s)
                pathrelse(&path);
 
                inode = reiserfs_iget(s, &obj_key);
-               if (!inode) {
+               if (IS_ERR_OR_NULL(inode)) {
                        /*
                         * the unlink almost completed, it just did not
                         * manage to remove "save" link and release objectid
index e5ddb4e5ea9497956cfa7b04dca84eba790829d1..57e0b23105327b298d17db42d44c2ebf8bd31715 100644 (file)
 #ifdef CONFIG_REISERFS_FS_XATTR
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
        return dir->i_op->create(dir, dentry, mode, true);
 }
 #endif
 
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
        return dir->i_op->mkdir(dir, dentry, mode);
 }
 
@@ -85,11 +85,11 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
 
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        error = dir->i_op->unlink(dir, dentry);
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
 
        if (!error)
                d_delete(dentry);
@@ -100,13 +100,13 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       BUG_ON(!mutex_is_locked(&dir->i_mutex));
+       BUG_ON(!inode_is_locked(dir));
 
-       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
        error = dir->i_op->rmdir(dir, dentry);
        if (!error)
                d_inode(dentry)->i_flags |= S_DEAD;
-       mutex_unlock(&d_inode(dentry)->i_mutex);
+       inode_unlock(d_inode(dentry));
        if (!error)
                d_delete(dentry);
 
@@ -123,7 +123,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
        if (d_really_is_negative(privroot))
                return ERR_PTR(-ENODATA);
 
-       mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR);
 
        xaroot = dget(REISERFS_SB(sb)->xattr_root);
        if (!xaroot)
@@ -139,7 +139,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
                }
        }
 
-       mutex_unlock(&d_inode(privroot)->i_mutex);
+       inode_unlock(d_inode(privroot));
        return xaroot;
 }
 
@@ -156,7 +156,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                 inode->i_generation);
 
-       mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR);
 
        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
        if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
@@ -170,7 +170,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                }
        }
 
-       mutex_unlock(&d_inode(xaroot)->i_mutex);
+       inode_unlock(d_inode(xaroot));
        dput(xaroot);
        return xadir;
 }
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
                container_of(ctx, struct reiserfs_dentry_buf, ctx);
        struct dentry *dentry;
 
-       WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
 
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                return -ENOSPC;
@@ -254,7 +254,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                goto out_dir;
        }
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
 
        buf.xadir = dir;
        while (1) {
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                        break;
                buf.count = 0;
        }
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        cleanup_dentry_buf(&buf);
 
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                if (!err) {
                        int jerror;
 
-                       mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
+                       inode_lock_nested(d_inode(dir->d_parent),
                                          I_MUTEX_XATTR);
                        err = action(dir, data);
                        reiserfs_write_lock(inode->i_sb);
                        jerror = journal_end(&th);
                        reiserfs_write_unlock(inode->i_sb);
-                       mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
+                       inode_unlock(d_inode(dir->d_parent));
                        err = jerror ?: err;
                }
        }
@@ -384,7 +384,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (IS_ERR(xadir))
                return ERR_CAST(xadir);
 
-       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
        xafile = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(xafile)) {
                err = PTR_ERR(xafile);
@@ -404,7 +404,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (err)
                dput(xafile);
 out:
-       mutex_unlock(&d_inode(xadir)->i_mutex);
+       inode_unlock(d_inode(xadir));
        dput(xadir);
        if (err)
                return ERR_PTR(err);
@@ -469,7 +469,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
        if (IS_ERR(xadir))
                return PTR_ERR(xadir);
 
-       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
        dentry = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
@@ -483,7 +483,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
 
        dput(dentry);
 out_dput:
-       mutex_unlock(&d_inode(xadir)->i_mutex);
+       inode_unlock(d_inode(xadir));
        dput(xadir);
        return err;
 }
@@ -580,11 +580,11 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                        .ia_valid = ATTR_SIZE | ATTR_CTIME,
                };
 
-               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+               inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
                inode_dio_wait(d_inode(dentry));
 
                err = reiserfs_setattr(dentry, &newattrs);
-               mutex_unlock(&d_inode(dentry)->i_mutex);
+               inode_unlock(d_inode(dentry));
        } else
                update_ctime(inode);
 out_unlock:
@@ -888,9 +888,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                goto out;
        }
 
-       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
        err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
-       mutex_unlock(&d_inode(dir)->i_mutex);
+       inode_unlock(d_inode(dir));
 
        if (!err)
                err = buf.pos;
@@ -905,7 +905,7 @@ static int create_privroot(struct dentry *dentry)
        int err;
        struct inode *inode = d_inode(dentry->d_parent);
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
 
        err = xattr_mkdir(inode, dentry, 0700);
        if (err || d_really_is_negative(dentry)) {
@@ -995,7 +995,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
        int err = 0;
 
        /* If we don't have the privroot located yet - go find it */
-       mutex_lock(&d_inode(s->s_root)->i_mutex);
+       inode_lock(d_inode(s->s_root));
        dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
                                strlen(PRIVROOT_NAME));
        if (!IS_ERR(dentry)) {
@@ -1005,7 +1005,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
                        d_inode(dentry)->i_flags |= S_PRIVATE;
        } else
                err = PTR_ERR(dentry);
-       mutex_unlock(&d_inode(s->s_root)->i_mutex);
+       inode_unlock(d_inode(s->s_root));
 
        return err;
 }
@@ -1025,14 +1025,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                goto error;
 
        if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
-               mutex_lock(&d_inode(s->s_root)->i_mutex);
+               inode_lock(d_inode(s->s_root));
                err = create_privroot(REISERFS_SB(s)->priv_root);
-               mutex_unlock(&d_inode(s->s_root)->i_mutex);
+               inode_unlock(d_inode(s->s_root));
        }
 
        if (d_really_is_positive(privroot)) {
                s->s_xattr = reiserfs_xattr_handlers;
-               mutex_lock(&d_inode(privroot)->i_mutex);
+               inode_lock(d_inode(privroot));
                if (!REISERFS_SB(s)->xattr_root) {
                        struct dentry *dentry;
 
@@ -1043,7 +1043,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                        else
                                err = PTR_ERR(dentry);
                }
-               mutex_unlock(&d_inode(privroot)->i_mutex);
+               inode_unlock(d_inode(privroot));
        }
 
 error:
index b94fa6c3c6ebe8c7308aea0052f3afacad9745c4..053818dd6c18be8f228e1c40483e50d78aa78007 100644 (file)
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
        if (isalarm(ctx))
                remaining = alarm_expires_remaining(&ctx->t.alarm);
        else
-               remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+               remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
        return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
index c66f2423e1f5c511b0202cfdab46a5e2be53d7f8..4a0e48f9210483adf6b9bee36f220e80d65a09cd 100644 (file)
@@ -84,9 +84,9 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
         * the files within the tracefs system. It is up to the individual
         * mkdir routine to handle races.
         */
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        ret = tracefs_ops.mkdir(name);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        kfree(name);
 
@@ -109,13 +109,13 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
         * This time we need to unlock not only the parent (inode) but
         * also the directory that is being deleted.
         */
-       mutex_unlock(&inode->i_mutex);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       inode_unlock(inode);
+       inode_unlock(dentry->d_inode);
 
        ret = tracefs_ops.rmdir(name);
 
-       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-       mutex_lock(&dentry->d_inode->i_mutex);
+       inode_lock_nested(inode, I_MUTEX_PARENT);
+       inode_lock(dentry->d_inode);
 
        kfree(name);
 
@@ -334,7 +334,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = tracefs_mount->mnt_root;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && dentry->d_inode) {
                dput(dentry);
@@ -342,7 +342,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        }
 
        if (IS_ERR(dentry)) {
-               mutex_unlock(&parent->d_inode->i_mutex);
+               inode_unlock(parent->d_inode);
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
        }
 
@@ -351,7 +351,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       inode_unlock(dentry->d_parent->d_inode);
        dput(dentry);
        simple_release_fs(&tracefs_mount, &tracefs_mount_count);
        return NULL;
@@ -359,7 +359,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       inode_unlock(dentry->d_parent->d_inode);
        return dentry;
 }
 
@@ -544,9 +544,9 @@ void tracefs_remove(struct dentry *dentry)
        if (!parent || !parent->d_inode)
                return;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
        ret = __tracefs_remove(dentry, parent);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        if (!ret)
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
 }
@@ -572,7 +572,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
 
        parent = dentry;
  down:
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -587,7 +587,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&parent->d_inode->i_mutex);
+                       inode_unlock(parent->d_inode);
                        parent = child;
                        goto down;
                }
@@ -608,10 +608,10 @@ void tracefs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       inode_lock(parent->d_inode);
 
        if (child != dentry)
                /* go up */
@@ -619,7 +619,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
 
        if (!__tracefs_remove(child, parent))
                simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       inode_unlock(parent->d_inode);
 }
 
 /**
index e49bd2808bf3e397b9b499fe4d6036ad7600a71b..795992a8321e9e0c9531c92f97c91cc10835cc02 100644 (file)
@@ -515,8 +515,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
        dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
                dentry, inode->i_ino,
                inode->i_nlink, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
 
        err = dbg_check_synced_i_size(c, inode);
        if (err)
@@ -572,8 +572,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
        dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
                dentry, inode->i_ino,
                inode->i_nlink, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
        err = dbg_check_synced_i_size(c, inode);
        if (err)
                return err;
@@ -661,8 +661,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 
        dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
                inode->i_ino, dir->i_ino);
-       ubifs_assert(mutex_is_locked(&dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&inode->i_mutex));
+       ubifs_assert(inode_is_locked(dir));
+       ubifs_assert(inode_is_locked(inode));
        err = check_dir_empty(c, d_inode(dentry));
        if (err)
                return err;
@@ -996,10 +996,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
                old_dentry, old_inode->i_ino, old_dir->i_ino,
                new_dentry, new_dir->i_ino);
-       ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
-       ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+       ubifs_assert(inode_is_locked(old_dir));
+       ubifs_assert(inode_is_locked(new_dir));
        if (unlink)
-               ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+               ubifs_assert(inode_is_locked(new_inode));
 
 
        if (unlink && is_dir) {
index eff62801acbf10524e31f8ad7816c160e6f88a90..065c88f8e4b8c2d689e8fe9ce13122b5465e8f63 100644 (file)
@@ -1317,7 +1317,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (err)
                return err;
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        /* Synchronize the inode unless this is a 'datasync()' call. */
        if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
@@ -1332,7 +1332,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
         */
        err = ubifs_sync_wbufs_by_inode(c, inode);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err;
 }
 
index e53292d0c21bcd9d57c3f219d3f7cc31d5eb836e..c7f4d434d098fb7ac4a35df55e2c60adb179d4c0 100644 (file)
@@ -313,7 +313,7 @@ static int setxattr(struct inode *host, const char *name, const void *value,
        union ubifs_key key;
        int err, type;
 
-       ubifs_assert(mutex_is_locked(&host->i_mutex));
+       ubifs_assert(inode_is_locked(host));
 
        if (size > UBIFS_MAX_INO_DATA)
                return -ERANGE;
@@ -550,7 +550,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
 
        dbg_gen("xattr '%s', ino %lu ('%pd')", name,
                host->i_ino, dentry);
-       ubifs_assert(mutex_is_locked(&host->i_mutex));
+       ubifs_assert(inode_is_locked(host));
 
        err = check_namespace(&nm);
        if (err < 0)
index bddf3d071daec536fe5a3e775b2e041e0df5a620..1af98963d860f0e4ed2959d256ff53737d070fcb 100644 (file)
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct udf_inode_info *iinfo = UDF_I(inode);
        int err;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        retval = generic_write_checks(iocb, from);
        if (retval <= 0)
@@ -136,7 +136,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                                (udf_file_entry_alloc_offset(inode) + end)) {
                        err = udf_expand_file_adinicb(inode);
                        if (err) {
-                               mutex_unlock(&inode->i_mutex);
+                               inode_unlock(inode);
                                udf_debug("udf_expand_adinicb: err=%d\n", err);
                                return err;
                        }
@@ -149,7 +149,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        retval = __generic_file_write_iter(iocb, from);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (retval > 0) {
                mark_inode_dirty(inode);
@@ -223,12 +223,12 @@ static int udf_release_file(struct inode *inode, struct file *filp)
                 * Grab i_mutex to avoid races with writes changing i_size
                 * while we are running.
                 */
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                down_write(&UDF_I(inode)->i_data_sem);
                udf_discard_prealloc(inode);
                udf_truncate_tail_extent(inode);
                up_write(&UDF_I(inode)->i_data_sem);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        return 0;
 }
index 87dc16d155723eefb61b2a28c906287d4bf11222..166d3ed32c39a54b48c4ec93e2297de3d9efdcc2 100644 (file)
@@ -262,7 +262,7 @@ int udf_expand_file_adinicb(struct inode *inode)
                .nr_to_write = 1,
        };
 
-       WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON_ONCE(!inode_is_locked(inode));
        if (!iinfo->i_lenAlloc) {
                if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
                        iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
index 0fbb4c7c72e83701bb32ef066908a33b435900ae..a522c15a0bfd7e5e9e0d758bfbc59185a8cb2609 100644 (file)
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
 {
        int i;
        int nr_groups = bitmap->s_nr_groups;
-       int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
-                                               nr_groups);
 
        for (i = 0; i < nr_groups; i++)
                if (bitmap->s_block_bitmap[i])
                        brelse(bitmap->s_block_bitmap[i]);
 
-       if (size <= PAGE_SIZE)
-               kfree(bitmap);
-       else
-               vfree(bitmap);
+       kvfree(bitmap);
 }
 
 static void udf_free_partition(struct udf_part_map *map)
index aa138d64560a6a3c2133bc70d57e367cc8c1476d..85c40f4f373d56b3d5d41ed2fe7459759c8a3aeb 100644 (file)
@@ -103,9 +103,9 @@ static int utimes_common(struct path *path, struct timespec *times)
                }
        }
 retry_deleg:
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = notify_change(path->dentry, &newattrs, &delegated_inode);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
index d5dd6c8b82a712085c105ec1c30f827c639ff614..07d0e47f6a7f1968782b2b31be478a6dd3851656 100644 (file)
@@ -129,7 +129,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_inode_setxattr(dentry, name, value, size, flags);
        if (error)
                goto out;
@@ -137,7 +137,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -277,7 +277,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
        if (error)
                return error;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        error = security_inode_removexattr(dentry, name);
        if (error)
                goto out;
@@ -290,7 +290,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
        }
 
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(vfs_removexattr);
index e2536bb1c760036af5efff9e13437a26b179da27..dc97eb21af071b0e040fe0d74b8ba51214e28cf9 100644 (file)
@@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 
 /*
  * Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
  */
 #define XFS_DIFLAG_REALTIME_BIT  0     /* file's blocks come from rt area */
 #define XFS_DIFLAG_PREALLOC_BIT  1     /* file space has been preallocated */
@@ -1025,6 +1023,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
         XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
         XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
 
+/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT    0       /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX                (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY                (XFS_DIFLAG2_DAX)
+
 /*
  * Inode number format:
  * low inopblog bits - offset in block
index b2b73a998d425d04779262ead61d02d65b153bc8..fffe3d01bd9fb5c771a4919edee1c9f615ab49e2 100644 (file)
@@ -35,40 +35,6 @@ struct dioattr {
 };
 #endif
 
-/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
-       __u32           fsx_xflags;     /* xflags field value (get/set) */
-       __u32           fsx_extsize;    /* extsize field value (get/set)*/
-       __u32           fsx_nextents;   /* nextents field value (get)   */
-       __u32           fsx_projid;     /* project identifier (get/set) */
-       unsigned char   fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME     0x00000001      /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC     0x00000002      /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE    0x00000008      /* file cannot be modified */
-#define XFS_XFLAG_APPEND       0x00000010      /* all writes append */
-#define XFS_XFLAG_SYNC         0x00000020      /* all writes synchronous */
-#define XFS_XFLAG_NOATIME      0x00000040      /* do not update access time */
-#define XFS_XFLAG_NODUMP       0x00000080      /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT    0x00000100      /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
-#define XFS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
-#define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
-
 /*
  * Structure for XFS_IOC_GETBMAP.
  * On input, fill in bmv_offset and bmv_length of the first structure
@@ -514,8 +480,8 @@ typedef struct xfs_swapext
 #define XFS_IOC_ALLOCSP                _IOW ('X', 10, struct xfs_flock64)
 #define XFS_IOC_FREESP         _IOW ('X', 11, struct xfs_flock64)
 #define XFS_IOC_DIOINFO                _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR     _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR     _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR     FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR     FS_IOC_FSSETXATTR
 #define XFS_IOC_ALLOCSP64      _IOW ('X', 36, struct xfs_flock64)
 #define XFS_IOC_FREESP64       _IOW ('X', 37, struct xfs_flock64)
 #define XFS_IOC_GETBMAP                _IOWR('X', 38, struct getbmap)
index daed4bfb85b2f2d56a3e82fa5ba4156f9f41011d..435c7de42e5f322a82845382ad7e1fa54dfe3d0b 100644 (file)
@@ -1527,6 +1527,16 @@ xfs_wait_buftarg(
        LIST_HEAD(dispose);
        int loop = 0;
 
+       /*
+        * We need to flush the buffer workqueue to ensure that all IO
+        * completion processing is 100% done. Just waiting on buffer locks is
+        * not sufficient for async IO as the reference count held over IO is
+        * not released until after the buffer lock is dropped. Hence we need to
+        * ensure here that all reference counts have been dropped before we
+        * start walking the LRU list.
+        */
+       drain_workqueue(btp->bt_mount->m_buf_workqueue);
+
        /* loop until there is nothing left on the lru list. */
        while (list_lru_count(&btp->bt_lru)) {
                list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
index ebe9b8290a705c402759c985c015b0bbc5f6a151..52883ac3cf84c06761afcf0792bbafcf781d509b 100644 (file)
@@ -55,7 +55,7 @@ xfs_rw_ilock(
        int                     type)
 {
        if (type & XFS_IOLOCK_EXCL)
-               mutex_lock(&VFS_I(ip)->i_mutex);
+               inode_lock(VFS_I(ip));
        xfs_ilock(ip, type);
 }
 
@@ -66,7 +66,7 @@ xfs_rw_iunlock(
 {
        xfs_iunlock(ip, type);
        if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
+               inode_unlock(VFS_I(ip));
 }
 
 static inline void
@@ -76,7 +76,7 @@ xfs_rw_ilock_demote(
 {
        xfs_ilock_demote(ip, type);
        if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
+               inode_unlock(VFS_I(ip));
 }
 
 /*
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
 /*
  * pfn_mkwrite was originally inteneded to ensure we capture time stamp
  * updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
  */
 static int
 xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (vmf->pgoff >= size)
                ret = VM_FAULT_SIGBUS;
+       else if (IS_DAX(inode))
+               ret = dax_pfn_mkwrite(vma, vmf);
        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
        sb_end_pagefault(inode->i_sb);
        return ret;
index ae3758a90ed63ba6b714ccbdafdf007207a8be07..ceba1a83cacccd649caf473ebcf2dfae984bb243 100644 (file)
@@ -610,60 +610,69 @@ __xfs_iflock(
 
 STATIC uint
 _xfs_dic2xflags(
-       __uint16_t              di_flags)
+       __uint16_t              di_flags,
+       uint64_t                di_flags2,
+       bool                    has_attr)
 {
        uint                    flags = 0;
 
        if (di_flags & XFS_DIFLAG_ANY) {
                if (di_flags & XFS_DIFLAG_REALTIME)
-                       flags |= XFS_XFLAG_REALTIME;
+                       flags |= FS_XFLAG_REALTIME;
                if (di_flags & XFS_DIFLAG_PREALLOC)
-                       flags |= XFS_XFLAG_PREALLOC;
+                       flags |= FS_XFLAG_PREALLOC;
                if (di_flags & XFS_DIFLAG_IMMUTABLE)
-                       flags |= XFS_XFLAG_IMMUTABLE;
+                       flags |= FS_XFLAG_IMMUTABLE;
                if (di_flags & XFS_DIFLAG_APPEND)
-                       flags |= XFS_XFLAG_APPEND;
+                       flags |= FS_XFLAG_APPEND;
                if (di_flags & XFS_DIFLAG_SYNC)
-                       flags |= XFS_XFLAG_SYNC;
+                       flags |= FS_XFLAG_SYNC;
                if (di_flags & XFS_DIFLAG_NOATIME)
-                       flags |= XFS_XFLAG_NOATIME;
+                       flags |= FS_XFLAG_NOATIME;
                if (di_flags & XFS_DIFLAG_NODUMP)
-                       flags |= XFS_XFLAG_NODUMP;
+                       flags |= FS_XFLAG_NODUMP;
                if (di_flags & XFS_DIFLAG_RTINHERIT)
-                       flags |= XFS_XFLAG_RTINHERIT;
+                       flags |= FS_XFLAG_RTINHERIT;
                if (di_flags & XFS_DIFLAG_PROJINHERIT)
-                       flags |= XFS_XFLAG_PROJINHERIT;
+                       flags |= FS_XFLAG_PROJINHERIT;
                if (di_flags & XFS_DIFLAG_NOSYMLINKS)
-                       flags |= XFS_XFLAG_NOSYMLINKS;
+                       flags |= FS_XFLAG_NOSYMLINKS;
                if (di_flags & XFS_DIFLAG_EXTSIZE)
-                       flags |= XFS_XFLAG_EXTSIZE;
+                       flags |= FS_XFLAG_EXTSIZE;
                if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
-                       flags |= XFS_XFLAG_EXTSZINHERIT;
+                       flags |= FS_XFLAG_EXTSZINHERIT;
                if (di_flags & XFS_DIFLAG_NODEFRAG)
-                       flags |= XFS_XFLAG_NODEFRAG;
+                       flags |= FS_XFLAG_NODEFRAG;
                if (di_flags & XFS_DIFLAG_FILESTREAM)
-                       flags |= XFS_XFLAG_FILESTREAM;
+                       flags |= FS_XFLAG_FILESTREAM;
        }
 
+       if (di_flags2 & XFS_DIFLAG2_ANY) {
+               if (di_flags2 & XFS_DIFLAG2_DAX)
+                       flags |= FS_XFLAG_DAX;
+       }
+
+       if (has_attr)
+               flags |= FS_XFLAG_HASATTR;
+
        return flags;
 }
 
 uint
 xfs_ip2xflags(
-       xfs_inode_t             *ip)
+       struct xfs_inode        *ip)
 {
-       xfs_icdinode_t          *dic = &ip->i_d;
+       struct xfs_icdinode     *dic = &ip->i_d;
 
-       return _xfs_dic2xflags(dic->di_flags) |
-                               (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+       return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
 }
 
 uint
 xfs_dic2xflags(
-       xfs_dinode_t            *dip)
+       struct xfs_dinode       *dip)
 {
-       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
-                               (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+                               be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
 }
 
 /*
@@ -862,7 +871,8 @@ xfs_ialloc(
        case S_IFREG:
        case S_IFDIR:
                if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-                       uint    di_flags = 0;
+                       uint64_t        di_flags2 = 0;
+                       uint            di_flags = 0;
 
                        if (S_ISDIR(mode)) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
@@ -898,7 +908,11 @@ xfs_ialloc(
                                di_flags |= XFS_DIFLAG_NODEFRAG;
                        if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
                                di_flags |= XFS_DIFLAG_FILESTREAM;
+                       if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+                               di_flags2 |= XFS_DIFLAG2_DAX;
+
                        ip->i_d.di_flags |= di_flags;
+                       ip->i_d.di_flags2 |= di_flags2;
                }
                /* FALLTHROUGH */
        case S_IFLNK:
index d42738deec6de6128b1a4b0784c45b5447848d02..478d04e07f9500d6ceed20231deb6b474161c708 100644 (file)
@@ -859,25 +859,25 @@ xfs_merge_ioc_xflags(
        unsigned int    xflags = start;
 
        if (flags & FS_IMMUTABLE_FL)
-               xflags |= XFS_XFLAG_IMMUTABLE;
+               xflags |= FS_XFLAG_IMMUTABLE;
        else
-               xflags &= ~XFS_XFLAG_IMMUTABLE;
+               xflags &= ~FS_XFLAG_IMMUTABLE;
        if (flags & FS_APPEND_FL)
-               xflags |= XFS_XFLAG_APPEND;
+               xflags |= FS_XFLAG_APPEND;
        else
-               xflags &= ~XFS_XFLAG_APPEND;
+               xflags &= ~FS_XFLAG_APPEND;
        if (flags & FS_SYNC_FL)
-               xflags |= XFS_XFLAG_SYNC;
+               xflags |= FS_XFLAG_SYNC;
        else
-               xflags &= ~XFS_XFLAG_SYNC;
+               xflags &= ~FS_XFLAG_SYNC;
        if (flags & FS_NOATIME_FL)
-               xflags |= XFS_XFLAG_NOATIME;
+               xflags |= FS_XFLAG_NOATIME;
        else
-               xflags &= ~XFS_XFLAG_NOATIME;
+               xflags &= ~FS_XFLAG_NOATIME;
        if (flags & FS_NODUMP_FL)
-               xflags |= XFS_XFLAG_NODUMP;
+               xflags |= FS_XFLAG_NODUMP;
        else
-               xflags &= ~XFS_XFLAG_NODUMP;
+               xflags &= ~FS_XFLAG_NODUMP;
 
        return xflags;
 }
@@ -945,40 +945,51 @@ xfs_set_diflags(
        unsigned int            xflags)
 {
        unsigned int            di_flags;
+       uint64_t                di_flags2;
 
        /* can't set PREALLOC this way, just preserve it */
        di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-       if (xflags & XFS_XFLAG_IMMUTABLE)
+       if (xflags & FS_XFLAG_IMMUTABLE)
                di_flags |= XFS_DIFLAG_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
+       if (xflags & FS_XFLAG_APPEND)
                di_flags |= XFS_DIFLAG_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
+       if (xflags & FS_XFLAG_SYNC)
                di_flags |= XFS_DIFLAG_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
+       if (xflags & FS_XFLAG_NOATIME)
                di_flags |= XFS_DIFLAG_NOATIME;
-       if (xflags & XFS_XFLAG_NODUMP)
+       if (xflags & FS_XFLAG_NODUMP)
                di_flags |= XFS_DIFLAG_NODUMP;
-       if (xflags & XFS_XFLAG_NODEFRAG)
+       if (xflags & FS_XFLAG_NODEFRAG)
                di_flags |= XFS_DIFLAG_NODEFRAG;
-       if (xflags & XFS_XFLAG_FILESTREAM)
+       if (xflags & FS_XFLAG_FILESTREAM)
                di_flags |= XFS_DIFLAG_FILESTREAM;
        if (S_ISDIR(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_RTINHERIT)
+               if (xflags & FS_XFLAG_RTINHERIT)
                        di_flags |= XFS_DIFLAG_RTINHERIT;
-               if (xflags & XFS_XFLAG_NOSYMLINKS)
+               if (xflags & FS_XFLAG_NOSYMLINKS)
                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-               if (xflags & XFS_XFLAG_EXTSZINHERIT)
+               if (xflags & FS_XFLAG_EXTSZINHERIT)
                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-               if (xflags & XFS_XFLAG_PROJINHERIT)
+               if (xflags & FS_XFLAG_PROJINHERIT)
                        di_flags |= XFS_DIFLAG_PROJINHERIT;
        } else if (S_ISREG(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_REALTIME)
+               if (xflags & FS_XFLAG_REALTIME)
                        di_flags |= XFS_DIFLAG_REALTIME;
-               if (xflags & XFS_XFLAG_EXTSIZE)
+               if (xflags & FS_XFLAG_EXTSIZE)
                        di_flags |= XFS_DIFLAG_EXTSIZE;
        }
-
        ip->i_d.di_flags = di_flags;
+
+       /* diflags2 only valid for v3 inodes. */
+       if (ip->i_d.di_version < 3)
+               return;
+
+       di_flags2 = 0;
+       if (xflags & FS_XFLAG_DAX)
+               di_flags2 |= XFS_DIFLAG2_DAX;
+
+       ip->i_d.di_flags2 = di_flags2;
+
 }
 
 STATIC void
@@ -988,22 +999,27 @@ xfs_diflags_to_linux(
        struct inode            *inode = VFS_I(ip);
        unsigned int            xflags = xfs_ip2xflags(ip);
 
-       if (xflags & XFS_XFLAG_IMMUTABLE)
+       if (xflags & FS_XFLAG_IMMUTABLE)
                inode->i_flags |= S_IMMUTABLE;
        else
                inode->i_flags &= ~S_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
+       if (xflags & FS_XFLAG_APPEND)
                inode->i_flags |= S_APPEND;
        else
                inode->i_flags &= ~S_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
+       if (xflags & FS_XFLAG_SYNC)
                inode->i_flags |= S_SYNC;
        else
                inode->i_flags &= ~S_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
+       if (xflags & FS_XFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
        else
                inode->i_flags &= ~S_NOATIME;
+       if (xflags & FS_XFLAG_DAX)
+               inode->i_flags |= S_DAX;
+       else
+               inode->i_flags &= ~S_DAX;
+
 }
 
 static int
@@ -1016,11 +1032,11 @@ xfs_ioctl_setattr_xflags(
 
        /* Can't change realtime flag if any extents are allocated. */
        if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
                return -EINVAL;
 
        /* If realtime flag is set then must have realtime device */
-       if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+       if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
                if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
                    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
                        return -EINVAL;
@@ -1031,7 +1047,7 @@ xfs_ioctl_setattr_xflags(
         * we have appropriate permission.
         */
        if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
-            (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+            (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
            !capable(CAP_LINUX_IMMUTABLE))
                return -EPERM;
 
@@ -1095,8 +1111,8 @@ out_cancel:
  * extent size hint validation is somewhat cumbersome. Rules are:
  *
  * 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
  * 4. can only be changed on regular files if no extents are allocated
  * 5. can be changed on directories at any time
  * 6. extsize hint of 0 turns off hints, clears inode flags.
@@ -1112,10 +1128,10 @@ xfs_ioctl_setattr_check_extsize(
 {
        struct xfs_mount        *mp = ip->i_mount;
 
-       if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
                return -EINVAL;
 
-       if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+       if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
            !S_ISDIR(ip->i_d.di_mode))
                return -EINVAL;
 
@@ -1132,7 +1148,7 @@ xfs_ioctl_setattr_check_extsize(
                        return -EINVAL;
 
                if (XFS_IS_REALTIME_INODE(ip) ||
-                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                   (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
                        size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
                } else {
                        size = mp->m_sb.sb_blocksize;
@@ -1143,7 +1159,7 @@ xfs_ioctl_setattr_check_extsize(
                if (fa->fsx_extsize % size)
                        return -EINVAL;
        } else
-               fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+               fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
 
        return 0;
 }
@@ -1168,7 +1184,7 @@ xfs_ioctl_setattr_check_projid(
 
        if (xfs_get_projid(ip) != fa->fsx_projid)
                return -EINVAL;
-       if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+       if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
            (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
                return -EINVAL;
 
index 06eafafe636e20f90e8f696ee20c0bb01b64ea8f..76b71a1c6c323e2043aeab1e93fb5a66db9f39d0 100644 (file)
@@ -1205,8 +1205,8 @@ xfs_diflags_to_iflags(
                inode->i_flags |= S_SYNC;
        if (flags & XFS_DIFLAG_NOATIME)
                inode->i_flags |= S_NOATIME;
-       /* XXX: Also needs an on-disk per inode flag! */
-       if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+       if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+           ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
                inode->i_flags |= S_DAX;
 }
 
index dc6221942b85f437767decfca9bf7ca9b0a94026..ade236e90bb3612d429a8b6b0909b3937302096c 100644 (file)
@@ -42,11 +42,11 @@ xfs_break_layouts(
        while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
                xfs_iunlock(ip, *iolock);
                if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                error = break_layout(inode, true);
                *iolock = XFS_IOLOCK_EXCL;
                if (with_imutex)
-                       mutex_lock(&inode->i_mutex);
+                       inode_lock(inode);
                xfs_ilock(ip, *iolock);
        }
 
index aa67339b953722b4e5c73442ac14835f0f9c4e32..4f18fd92ca13b21d8fd68e955e082d9db9a61195 100644 (file)
@@ -497,7 +497,6 @@ xfsaild(
        long            tout = 0;       /* milliseconds */
 
        current->flags |= PF_MEMALLOC;
-       set_freezable();
 
        while (!kthread_should_stop()) {
                if (tout && tout <= 20)
index 717a29810473894aaf2f22e6e487f84087628df5..dad8af3ebeb5405c144db72cea488d7f5ea5fd39 100644 (file)
@@ -133,6 +133,5 @@ extern int acpi_get_psd_map(struct cpudata **);
 /* Methods to interact with the PCC mailbox controller. */
 extern struct mbox_chan *
        pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-extern int mbox_send_message(struct mbox_chan *chan, void *mssg);
 
 #endif /* _CPPC_ACPI_H*/
index 3d69c93d50e83d7b804f9de7260007ffc9903d69..6361892ea737137899c5d9489634446cb3bdedac 100644 (file)
@@ -204,6 +204,7 @@ struct crypto_ahash {
                      unsigned int keylen);
 
        unsigned int reqsize;
+       bool has_setkey;
        struct crypto_tfm base;
 };
 
@@ -375,6 +376,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
                        unsigned int keylen);
 
+static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm)
+{
+       return tfm->has_setkey;
+}
+
 /**
  * crypto_ahash_finup() - update and finalize message digest
  * @req: reference to the ahash_request handle that holds all information
index 018afb264ac261ea5dea2f3afa547cf9a8f7a202..a2bfd7843f18f6e79d8bc7e5743b031345153053 100644 (file)
@@ -30,6 +30,9 @@ struct alg_sock {
 
        struct sock *parent;
 
+       unsigned int refcnt;
+       unsigned int nokey_refcnt;
+
        const struct af_alg_type *type;
        void *private;
 };
@@ -50,9 +53,11 @@ struct af_alg_type {
        void (*release)(void *private);
        int (*setkey)(void *private, const u8 *key, unsigned int keylen);
        int (*accept)(void *private, struct sock *sk);
+       int (*accept_nokey)(void *private, struct sock *sk);
        int (*setauthsize)(void *private, unsigned int authsize);
 
        struct proto_ops *ops;
+       struct proto_ops *ops_nokey;
        struct module *owner;
        char name[14];
 };
@@ -67,6 +72,7 @@ int af_alg_register_type(const struct af_alg_type *type);
 int af_alg_unregister_type(const struct af_alg_type *type);
 
 int af_alg_release(struct socket *sock);
+void af_alg_release_parent(struct sock *sk);
 int af_alg_accept(struct sock *sk, struct socket *newsock);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
@@ -83,11 +89,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk)
        return (struct alg_sock *)sk;
 }
 
-static inline void af_alg_release_parent(struct sock *sk)
-{
-       sock_put(alg_sk(sk)->parent);
-}
-
 static inline void af_alg_init_completion(struct af_alg_completion *completion)
 {
        init_completion(&completion->completion);
index d8dd41fb034fe5af52b68699096abb6ebc5559d5..fd8742a40ff3e93476ec3b8cdb1370200275b6d3 100644 (file)
@@ -61,6 +61,8 @@ struct crypto_skcipher {
        unsigned int ivsize;
        unsigned int reqsize;
 
+       bool has_setkey;
+
        struct crypto_tfm base;
 };
 
@@ -305,6 +307,11 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
        return tfm->setkey(tfm, key, keylen);
 }
 
+static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm)
+{
+       return tfm->has_setkey;
+}
+
 /**
  * crypto_skcipher_reqtfm() - obtain cipher handle from request
  * @req: skcipher_request out of which the cipher handle is to be obtained
index 89d008dc08e2e2c6caa1d1d5eeb1ac3bdedd2474..fe5efada9d68237c3863ad8f9bb972da6f610819 100644 (file)
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
                             struct drm_atomic_state *state,
                             bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                                        struct drm_atomic_state *old_state);
 
index 7bfb063029d83fe1374e78a174f4a638bc39bc49..461a0558bca4d8d16e81b88e0286e3fa6251ab79 100644 (file)
 
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 
+static inline bool drm_arch_can_wc_memory(void)
+{
+#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
+       return false;
+#else
+       return true;
+#endif
+}
+
 #endif
index 24ab1787b771936fdf1894aabd6271f65a771993..fdb47051d5492a4db126cc525667bd181343d2cd 100644 (file)
@@ -44,8 +44,6 @@ struct drm_dp_vcpi {
 /**
  * struct drm_dp_mst_port - MST port
  * @kref: reference count for this port.
- * @guid_valid: for DP 1.2 devices if we have validated the GUID.
- * @guid: guid for DP 1.2 device on this port.
  * @port_num: port number
  * @input: if this port is an input port.
  * @mcs: message capability status - DP 1.2 spec.
@@ -70,10 +68,6 @@ struct drm_dp_vcpi {
 struct drm_dp_mst_port {
        struct kref kref;
 
-       /* if dpcd 1.2 device is on this port - its GUID info */
-       bool guid_valid;
-       u8 guid[16];
-
        u8 port_num;
        bool input;
        bool mcs;
@@ -110,10 +104,12 @@ struct drm_dp_mst_port {
  * @tx_slots: transmission slots for this device.
  * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
+ * @guid: guid for DP 1.2 branch device. port under this branch can be
+ * identified by port #.
  *
  * This structure represents an MST branch device, there is one
- * primary branch device at the root, along with any others connected
- * to downstream ports
+ * primary branch device at the root, along with any other branches connected
+ * to downstream port of parent branches.
  */
 struct drm_dp_mst_branch {
        struct kref kref;
@@ -132,6 +128,9 @@ struct drm_dp_mst_branch {
        struct drm_dp_sideband_msg_tx *tx_slots[2];
        int last_seqno;
        bool link_address_sent;
+
+       /* global unique identifier to identify branch devices */
+       u8 guid[16];
 };
 
 
@@ -406,11 +405,9 @@ struct drm_dp_payload {
  * @conn_base_id: DRM connector ID this mgr is connected to.
  * @down_rep_recv: msg receiver state for down replies.
  * @up_req_recv: msg receiver state for up requests.
- * @lock: protects mst state, primary, guid, dpcd.
+ * @lock: protects mst state, primary, dpcd.
  * @mst_state: if this manager is enabled for an MST capable port.
  * @mst_primary: pointer to the primary branch device.
- * @guid_valid: GUID valid for the primary branch device.
- * @guid: GUID for primary port.
  * @dpcd: cache of DPCD for primary port.
  * @pbn_div: PBN to slots divisor.
  *
@@ -432,13 +429,11 @@ struct drm_dp_mst_topology_mgr {
        struct drm_dp_sideband_msg_rx up_req_recv;
 
        /* pointer to info about the initial MST device */
-       struct mutex lock; /* protects mst_state + primary + guid + dpcd */
+       struct mutex lock; /* protects mst_state + primary + dpcd */
 
        bool mst_state;
        struct drm_dp_mst_branch *mst_primary;
-       /* primary MST device GUID */
-       bool guid_valid;
-       u8 guid[16];
+
        u8 dpcd[DP_RECEIVER_CAP_SIZE];
        u8 sink_count;
        int pbn_div;
index d639049a613dfdb264650f8ee938836e6ed28e10..553210c02ee0f655fd28b8ae64d370e7cee2fc70 100644 (file)
@@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_ONE          (1ULL << DRM_FIXED_POINT)
 #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1)
 #define DRM_FIXED_DIGITS_MASK  (~DRM_FIXED_DECIMAL_MASK)
+#define DRM_FIXED_EPSILON      1LL
+#define DRM_FIXED_ALMOST_ONE   (DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
 static inline s64 drm_int2fixp(int a)
 {
        return ((s64)a) << DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int(int64_t a)
+static inline int drm_fixp2int(s64 a)
 {
        return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline unsigned drm_fixp_msbset(int64_t a)
+static inline int drm_fixp2int_ceil(s64 a)
+{
+       if (a > 0)
+               return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
+       else
+               return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
+}
+
+static inline unsigned drm_fixp_msbset(s64 a)
 {
        unsigned shift, sign = (a >> 63) & 1;
 
@@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b)
        return result;
 }
 
+static inline s64 drm_fixp_from_fraction(s64 a, s64 b)
+{
+       s64 res;
+       bool a_neg = a < 0;
+       bool b_neg = b < 0;
+       u64 a_abs = a_neg ? -a : a;
+       u64 b_abs = b_neg ? -b : b;
+       u64 rem;
+
+       /* determine integer part */
+       u64 res_abs  = div64_u64_rem(a_abs, b_abs, &rem);
+
+       /* determine fractional part */
+       {
+               u32 i = DRM_FIXED_POINT;
+
+               do {
+                       rem <<= 1;
+                       res_abs <<= 1;
+                       if (rem >= b_abs) {
+                               res_abs |= 1;
+                               rem -= b_abs;
+                       }
+               } while (--i != 0);
+       }
+
+       /* round up LSB */
+       {
+               u64 summand = (rem << 1) >= b_abs;
+
+               res_abs += summand;
+       }
+
+       res = (s64) res_abs;
+       if (a_neg ^ b_neg)
+               res = -res;
+       return res;
+}
+
 static inline s64 drm_fixp_exp(s64 x)
 {
        s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000);
diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h
new file mode 100644 (file)
index 0000000..290c231
--- /dev/null
@@ -0,0 +1,112 @@
+#ifndef __LINUX_BCM963XX_NVRAM_H__
+#define __LINUX_BCM963XX_NVRAM_H__
+
+#include <linux/crc32.h>
+#include <linux/if_ether.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/*
+ * Broadcom BCM963xx SoC board nvram data structure.
+ *
+ * The nvram structure varies in size depending on the SoC board version. Use
+ * the appropriate minimum BCM963XX_NVRAM_*_SIZE define for the information
+ * you need instead of sizeof(struct bcm963xx_nvram) as this may change.
+ */
+
+#define BCM963XX_NVRAM_V4_SIZE         300
+#define BCM963XX_NVRAM_V5_SIZE         (1 * SZ_1K)
+
+#define BCM963XX_DEFAULT_PSI_SIZE      64
+
+enum bcm963xx_nvram_nand_part {
+       BCM963XX_NVRAM_NAND_PART_BOOT = 0,
+       BCM963XX_NVRAM_NAND_PART_ROOTFS_1,
+       BCM963XX_NVRAM_NAND_PART_ROOTFS_2,
+       BCM963XX_NVRAM_NAND_PART_DATA,
+       BCM963XX_NVRAM_NAND_PART_BBT,
+
+       __BCM963XX_NVRAM_NAND_NR_PARTS
+};
+
+struct bcm963xx_nvram {
+       u32     version;
+       char    bootline[256];
+       char    name[16];
+       u32     main_tp_number;
+       u32     psi_size;
+       u32     mac_addr_count;
+       u8      mac_addr_base[ETH_ALEN];
+       u8      __reserved1[2];
+       u32     checksum_v4;
+
+       u8      __reserved2[292];
+       u32     nand_part_offset[__BCM963XX_NVRAM_NAND_NR_PARTS];
+       u32     nand_part_size[__BCM963XX_NVRAM_NAND_NR_PARTS];
+       u8      __reserved3[388];
+       u32     checksum_v5;
+};
+
+#define BCM963XX_NVRAM_NAND_PART_OFFSET(nvram, part) \
+       bcm963xx_nvram_nand_part_offset(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_offset(
+       const struct bcm963xx_nvram *nvram,
+       enum bcm963xx_nvram_nand_part part)
+{
+       return nvram->nand_part_offset[part] * SZ_1K;
+}
+
+#define BCM963XX_NVRAM_NAND_PART_SIZE(nvram, part) \
+       bcm963xx_nvram_nand_part_size(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_size(
+       const struct bcm963xx_nvram *nvram,
+       enum bcm963xx_nvram_nand_part part)
+{
+       return nvram->nand_part_size[part] * SZ_1K;
+}
+
+/*
+ * bcm963xx_nvram_checksum - Verify nvram checksum
+ *
+ * @nvram: pointer to full size nvram data structure
+ * @expected_out: optional pointer to store expected checksum value
+ * @actual_out: optional pointer to store actual checksum value
+ *
+ * Return: 0 if the checksum is valid, otherwise -EINVAL
+ */
+static int __maybe_unused bcm963xx_nvram_checksum(
+       const struct bcm963xx_nvram *nvram,
+       u32 *expected_out, u32 *actual_out)
+{
+       u32 expected, actual;
+       size_t len;
+
+       if (nvram->version <= 4) {
+               expected = nvram->checksum_v4;
+               len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32);
+       } else {
+               expected = nvram->checksum_v5;
+               len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32);
+       }
+
+       /*
+        * Calculate the CRC32 value for the nvram with a checksum value
+        * of 0 without modifying or copying the nvram by combining:
+        * - The CRC32 of the nvram without the checksum value
+        * - The CRC32 of a zero checksum value (which is also 0)
+        */
+       actual = crc32_le_combine(
+               crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32));
+
+       if (expected_out)
+               *expected_out = expected;
+
+       if (actual_out)
+               *actual_out = actual;
+
+       return expected == actual ? 0 : -EINVAL;
+};
+
+#endif /* __LINUX_BCM963XX_NVRAM_H__ */
diff --git a/include/linux/bcm963xx_tag.h b/include/linux/bcm963xx_tag.h
new file mode 100644 (file)
index 0000000..161c7b3
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef __LINUX_BCM963XX_TAG_H__
+#define __LINUX_BCM963XX_TAG_H__
+
+#include <linux/types.h>
+
+#define TAGVER_LEN             4       /* Length of Tag Version */
+#define TAGLAYOUT_LEN          4       /* Length of FlashLayoutVer */
+#define SIG1_LEN               20      /* Company Signature 1 Length */
+#define SIG2_LEN               14      /* Company Signature 2 Length */
+#define BOARDID_LEN            16      /* Length of BoardId */
+#define ENDIANFLAG_LEN         2       /* Endian Flag Length */
+#define CHIPID_LEN             6       /* Chip Id Length */
+#define IMAGE_LEN              10      /* Length of Length Field */
+#define ADDRESS_LEN            12      /* Length of Address field */
+#define IMAGE_SEQUENCE_LEN     4       /* Image sequence Length */
+#define RSASIG_LEN             20      /* Length of RSA Signature in tag */
+#define TAGINFO1_LEN           30      /* Length of vendor information field1 in tag */
+#define FLASHLAYOUTVER_LEN     4       /* Length of Flash Layout Version String tag */
+#define TAGINFO2_LEN           16      /* Length of vendor information field2 in tag */
+#define ALTTAGINFO_LEN         54      /* Alternate length for vendor information; Pirelli */
+
+#define NUM_PIRELLI            2
+#define IMAGETAG_CRC_START     0xFFFFFFFF
+
+#define PIRELLI_BOARDS { \
+       "AGPF-S0", \
+       "DWV-S0", \
+}
+
+/* Extended flash address, needs to be subtracted
+ * from bcm_tag flash image offsets.
+ */
+#define BCM963XX_EXTENDED_SIZE 0xBFC00000
+
+/*
+ * The broadcom firmware assumes the rootfs starts the image,
+ * therefore uses the rootfs start (flash_image_address)
+ * to determine where to flash the image.  Since we have the kernel first
+ * we have to give it the kernel address, but the crc uses the length
+ * associated with this address (root_length), which is added to the kernel
+ * length (kernel_length) to determine the length of image to flash and thus
+ * needs to be rootfs + deadcode (jffs2 EOF marker)
+*/
+
+struct bcm_tag {
+       /* 0-3: Version of the image tag */
+       char tag_version[TAGVER_LEN];
+       /* 4-23: Company Line 1 */
+       char sig_1[SIG1_LEN];
+       /*  24-37: Company Line 2 */
+       char sig_2[SIG2_LEN];
+       /* 38-43: Chip this image is for */
+       char chip_id[CHIPID_LEN];
+       /* 44-59: Board name */
+       char board_id[BOARDID_LEN];
+       /* 60-61: Map endianness -- 1 BE 0 LE */
+       char big_endian[ENDIANFLAG_LEN];
+       /* 62-71: Total length of image */
+       char total_length[IMAGE_LEN];
+       /* 72-83: Address in memory of CFE */
+       char cfe__address[ADDRESS_LEN];
+       /* 84-93: Size of CFE */
+       char cfe_length[IMAGE_LEN];
+       /* 94-105: Address in memory of image start
+        * (kernel for OpenWRT, rootfs for stock firmware)
+        */
+       char flash_image_start[ADDRESS_LEN];
+       /* 106-115: Size of rootfs */
+       char root_length[IMAGE_LEN];
+       /* 116-127: Address in memory of kernel */
+       char kernel_address[ADDRESS_LEN];
+       /* 128-137: Size of kernel */
+       char kernel_length[IMAGE_LEN];
+       /* 138-141: Image sequence number
+        * (to be incremented when flashed with a new image)
+        */
+       char image_sequence[IMAGE_SEQUENCE_LEN];
+       /* 142-161: RSA Signature (not used; some vendors may use this) */
+       char rsa_signature[RSASIG_LEN];
+       /* 162-191: Compilation and related information (not used in OpenWrt) */
+       char information1[TAGINFO1_LEN];
+       /* 192-195: Version flash layout */
+       char flash_layout_ver[FLASHLAYOUTVER_LEN];
+       /* 196-199: kernel+rootfs CRC32 */
+       __u32 fskernel_crc;
+       /* 200-215: Unused except on Alice Gate where is is information */
+       char information2[TAGINFO2_LEN];
+       /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */
+       __u32 image_crc;
+       /* 220-223: CRC32 of rootfs partition */
+       __u32 rootfs_crc;
+       /* 224-227: CRC32 of kernel partition */
+       __u32 kernel_crc;
+       /* 228-235: Unused at present */
+       char reserved1[8];
+       /* 236-239: CRC32 of header excluding last 20 bytes */
+       __u32 header_crc;
+       /* 240-255: Unused at present */
+       char reserved2[16];
+};
+
+#endif /* __LINUX_BCM63XX_TAG_H__ */
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
deleted file mode 100644 (file)
index 77ae77c..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef BLK_IOPOLL_H
-#define BLK_IOPOLL_H
-
-struct blk_iopoll;
-typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
-
-struct blk_iopoll {
-       struct list_head list;
-       unsigned long state;
-       unsigned long data;
-       int weight;
-       int max;
-       blk_iopoll_fn *poll;
-};
-
-enum {
-       IOPOLL_F_SCHED          = 0,
-       IOPOLL_F_DISABLE        = 1,
-};
-
-/*
- * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
- * that we were the first to acquire this iop for scheduling. If this iop
- * is currently disabled, return "failure".
- */
-static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
-{
-       if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
-               return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
-
-       return 1;
-}
-
-static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
-{
-       return test_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-
-extern void blk_iopoll_sched(struct blk_iopoll *);
-extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
-extern void blk_iopoll_complete(struct blk_iopoll *);
-extern void __blk_iopoll_complete(struct blk_iopoll *);
-extern void blk_iopoll_enable(struct blk_iopoll *);
-extern void blk_iopoll_disable(struct blk_iopoll *);
-
-#endif
index f89b31d45cc894814d409a19e161a29c2c41e832..c1ef6f14e7be7f1317e34a0deac61fcb80511eef 100644 (file)
 #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
+#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
+#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
+#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
+#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
+#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
+#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
+#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_CRUSH_TUNABLES5   (1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -108,7 +120,9 @@ static inline u64 ceph_sanitize_features(u64 features)
         CEPH_FEATURE_CRUSH_TUNABLES3 |         \
         CEPH_FEATURE_OSD_PRIMARY_AFFINITY |    \
         CEPH_FEATURE_MSGR_KEEPALIVE2 |         \
-        CEPH_FEATURE_CRUSH_V4)
+        CEPH_FEATURE_CRUSH_V4 |                \
+        CEPH_FEATURE_CRUSH_TUNABLES5 |         \
+        CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
        (CEPH_FEATURE_NOSRCADDR |        \
index 5babb8e95352a6a7468feca70a343fafb56bddad..b827e066e55a198ec13f41d52b52964bd72edf4d 100644 (file)
@@ -40,46 +40,11 @@ static inline __u32 ceph_frag_mask_shift(__u32 f)
        return 24 - ceph_frag_bits(f);
 }
 
-static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+static inline bool ceph_frag_contains_value(__u32 f, __u32 v)
 {
        return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
 }
-static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
-{
-       /* is sub as specific as us, and contained by us? */
-       return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
-              (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
 
-static inline __u32 ceph_frag_parent(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f) - 1,
-                        ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
-}
-static inline int ceph_frag_is_left_child(__u32 f)
-{
-       return ceph_frag_bits(f) > 0 &&
-               (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
-}
-static inline int ceph_frag_is_right_child(__u32 f)
-{
-       return ceph_frag_bits(f) > 0 &&
-               (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
-}
-static inline __u32 ceph_frag_sibling(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f),
-                     ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
-}
-static inline __u32 ceph_frag_left_child(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
-}
-static inline __u32 ceph_frag_right_child(__u32 f)
-{
-       return ceph_frag_make(ceph_frag_bits(f)+1,
-             ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
-}
 static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
 {
        int newbits = ceph_frag_bits(f) + by;
index 71b1d6cdcb5d1fc53dd45fa3950e138cfac19dfe..8dbd7879fdc6b74719d9cb65b85d8c60f589bde6 100644 (file)
@@ -220,6 +220,7 @@ struct ceph_connection {
        struct ceph_entity_addr actual_peer_addr;
 
        /* message out temps */
+       struct ceph_msg_header out_hdr;
        struct ceph_msg *out_msg;        /* sending message (== tail of
                                            out_sent) */
        bool out_msg_done;
@@ -229,7 +230,6 @@ struct ceph_connection {
        int out_kvec_left;   /* kvec's left in out_kvec */
        int out_skip;        /* skip this many bytes */
        int out_kvec_bytes;  /* total bytes left */
-       bool out_kvec_is_msg; /* kvec refers to out_msg */
        int out_more;        /* there is more data after the kvecs */
        __le64 out_temp_ack; /* for writing an ack */
        struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
index bda5ec0b4b4dc26a8431756e468241aad6bdd030..fccf7f44139dd67c2bae6db1799e2517bcd9cd00 100644 (file)
@@ -37,7 +37,7 @@ struct cleancache_ops {
        void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
@@ -48,14 +48,14 @@ extern void __cleancache_invalidate_fs(struct super_block *);
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-       return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
        return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+       return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@ static inline void cleancache_init_shared_fs(struct super_block *sb)
 
 static inline int cleancache_get_page(struct page *page)
 {
-       int ret = -1;
-
        if (cleancache_enabled && cleancache_fs_enabled(page))
-               ret = __cleancache_get_page(page);
-       return ret;
+               return __cleancache_get_page(page);
+       return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
index 48b49305716bd728e69477db6b430c34e7781746..be8f12b8f1950499380c10de27ab6928df25fd8e 100644 (file)
@@ -59,7 +59,8 @@ enum {
        CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
        CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
        CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
-       CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+       CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
+       CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -205,6 +206,11 @@ struct crush_map {
         * mappings line up a bit better with previous mappings. */
        __u8 chooseleaf_vary_r;
 
+       /* if true, it makes chooseleaf firstn to return stable results (if
+        * no local retry) so that data migrations would be optimal when some
+        * device fails. */
+       __u8 chooseleaf_stable;
+
 #ifndef __KERNEL__
        /*
         * version 0 (original) of straw_calc has various flaws.  version 1
index b415e521528de3d5fc5db598fb588f688f42d307..818e45078929b94af5780a8c91f0e84d82da8e0c 100644 (file)
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
                dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
                dax_iodone_t);
+
+#ifdef CONFIG_FS_DAX
+struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+#else
+static inline struct page *read_dax_sector(struct block_device *bdev,
+               sector_t n)
+{
+       return ERR_PTR(-ENXIO);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
                                unsigned int flags, get_block_t, dax_iodone_t);
@@ -36,4 +47,11 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
        return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
 }
+
+static inline bool dax_mapping(struct address_space *mapping)
+{
+       return mapping->host && IS_DAX(mapping->host);
+}
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+               loff_t end);
 #endif
index eb73d74ed99262c83f2fa5e51582b2da0344bafa..ae681002100a1fb8401e934a99f40dedf039cd7d 100644 (file)
@@ -433,7 +433,8 @@ struct address_space {
        struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
        /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
-       unsigned long           nrshadows;      /* number of shadow entries */
+       /* number of shadow or DAX exceptional entries */
+       unsigned long           nrexceptional;
        pgoff_t                 writeback_index;/* writeback starts here */
        const struct address_space_operations *a_ops;   /* methods */
        unsigned long           flags;          /* error bits/gfp mask */
@@ -483,9 +484,6 @@ struct block_device {
        int                     bd_fsfreeze_count;
        /* Mutex for freeze */
        struct mutex            bd_fsfreeze_mutex;
-#ifdef CONFIG_FS_DAX
-       int                     bd_map_count;
-#endif
 };
 
 /*
@@ -714,6 +712,31 @@ enum inode_i_mutex_lock_class
        I_MUTEX_PARENT2,
 };
 
+static inline void inode_lock(struct inode *inode)
+{
+       mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+       mutex_unlock(&inode->i_mutex);
+}
+
+static inline int inode_trylock(struct inode *inode)
+{
+       return mutex_trylock(&inode->i_mutex);
+}
+
+static inline int inode_is_locked(struct inode *inode)
+{
+       return mutex_is_locked(&inode->i_mutex);
+}
+
+static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
+{
+       mutex_lock_nested(&inode->i_mutex, subclass);
+}
+
 void lock_two_nondirectories(struct inode *, struct inode*);
 void unlock_two_nondirectories(struct inode *, struct inode*);
 
@@ -2881,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
 
 static inline bool io_is_direct(struct file *filp)
 {
-       return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
+       return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
 static inline int iocb_flags(struct file *file)
@@ -3047,8 +3070,8 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
 }
 static inline bool dir_relax(struct inode *inode)
 {
-       mutex_unlock(&inode->i_mutex);
-       mutex_lock(&inode->i_mutex);
+       inode_unlock(inode);
+       inode_lock(inode);
        return !IS_DEADDIR(inode);
 }
 
index 0639dcc9819523a002de1149da5629d24bce039b..81de7123959d96d97dd57d41d76289b3a8185920 100644 (file)
@@ -165,7 +165,6 @@ struct ftrace_ops {
        ftrace_func_t                   saved_func;
        int __percpu                    *disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-       int                             nr_trampolines;
        struct ftrace_ops_hash          local_hash;
        struct ftrace_ops_hash          *func_hash;
        struct ftrace_ops_hash          old_hash;
index 28ad5f6494b018a2c49493920059de62fcacdb8e..af1f2b24bbe4172f6055407229f0670ee90dc534 100644 (file)
@@ -547,16 +547,16 @@ static inline bool pm_suspended_storage(void)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_CMA
-
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
                              unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+#endif
 
+#ifdef CONFIG_CMA
 /* CMA stuff */
 extern void init_cma_reserved_pageblock(struct page *page);
-
 #endif
 
 #endif /* __LINUX_GFP_H */
index 76dd4f0da5ca9907572bea19f4b0d0dbe8ad06eb..2ead22dd74a00896d24fdb937242e8ba36a77cfe 100644 (file)
@@ -87,7 +87,8 @@ enum hrtimer_restart {
  * @function:  timer expiry callback function
  * @base:      pointer to the timer base (per cpu and per clock)
  * @state:     state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:    Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *             started the timer
  * @start_site:        timer statistics field to store the site where the timer
  *             was started
@@ -101,7 +102,8 @@ struct hrtimer {
        ktime_t                         _softexpires;
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
-       unsigned long                   state;
+       u8                              state;
+       u8                              is_rel;
 #ifdef CONFIG_TIMER_STATS
        int                             start_pid;
        void                            *start_site;
@@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { }
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+       ktime_t rem = ktime_sub(timer->node.expires, now);
+
+       /*
+        * Adjust relative timers for the extra we added in
+        * hrtimer_start_range_ns() to prevent short timeouts.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+               rem.tv64 -= hrtimer_resolution;
+       return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+       return __hrtimer_expires_remaining_adjusted(timer,
+                                                   timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+       return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
index cfe81e10bd5429baf5ed0ccf50bd4613ff6ee3c6..459fd25b378e73cfd2e911761ad845076be547de 100644 (file)
@@ -120,15 +120,15 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                    unsigned long start,
                                    unsigned long end,
                                    long adjust_next);
-extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl);
+extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma);
 /* mmap_sem must be held on entry */
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma)
 {
        VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
        if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
-               return __pmd_trans_huge_lock(pmd, vma, ptl);
+               return __pmd_trans_huge_lock(pmd, vma);
        else
                return false;
 }
@@ -190,10 +190,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
                                         long adjust_next)
 {
 }
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+               struct vm_area_struct *vma)
 {
-       return false;
+       return NULL;
 }
 
 static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
index cb30edbfe9fcd010b217aa385073d54fa71105c4..0e95fcc75b2ac141ceeb3ebb3b2f79d7a5d2ddad 100644 (file)
@@ -413,7 +413,7 @@ enum
        NET_TX_SOFTIRQ,
        NET_RX_SOFTIRQ,
        BLOCK_SOFTIRQ,
-       BLOCK_IOPOLL_SOFTIRQ,
+       IRQ_POLL_SOFTIRQ,
        TASKLET_SOFTIRQ,
        SCHED_SOFTIRQ,
        HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
index f28dff313b07c3bb072dffc2235b27d5d0eaa09a..a5c539fa5d2bc03ba233f4d11de1b64d839990ab 100644 (file)
@@ -133,8 +133,9 @@ struct iommu_dm_region {
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@ struct iommu_dm_region {
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@ struct iommu_ops {
        int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
                                    phys_addr_t paddr, u64 size, int prot);
        void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-       /* Set the numer of window per domain */
+       /* Set the number of windows per domain */
        int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-       /* Get the numer of window per domain */
+       /* Get the number of windows per domain */
        u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h
new file mode 100644 (file)
index 0000000..3e8c1b8
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef IRQ_POLL_H
+#define IRQ_POLL_H
+
+struct irq_poll;
+typedef int (irq_poll_fn)(struct irq_poll *, int);
+
+struct irq_poll {
+       struct list_head list;
+       unsigned long state;
+       int weight;
+       irq_poll_fn *poll;
+};
+
+enum {
+       IRQ_POLL_F_SCHED        = 0,
+       IRQ_POLL_F_DISABLE      = 1,
+};
+
+extern void irq_poll_sched(struct irq_poll *);
+extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
+extern void irq_poll_complete(struct irq_poll *);
+extern void irq_poll_enable(struct irq_poll *);
+extern void irq_poll_disable(struct irq_poll *);
+
+#endif
index f64622ad02c196185be2eb79269e782d5772fcd0..04579d9fbce4f3b5c0a8484116500ee3034b93f7 100644 (file)
@@ -70,6 +70,7 @@ struct irq_fwspec {
  */
 enum irq_domain_bus_token {
        DOMAIN_BUS_ANY          = 0,
+       DOMAIN_BUS_WIRED,
        DOMAIN_BUS_PCI_MSI,
        DOMAIN_BUS_PLATFORM_MSI,
        DOMAIN_BUS_NEXUS,
index 9ae48d4aeb5ec7d6fafa9528c79f65f62a091ed3..792c8981e63365b06a03e7feda7e647eafb533c0 100644 (file)
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index {
        MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
        MEM_CGROUP_STAT_NSTATS,
        /* default hierarchy stats */
-       MEMCG_SOCK,
+       MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
        MEMCG_NR_STAT,
 };
 
index 58391f2e0414e0e2001fab4c002771798fad3d33..116b284bc4ce81b77c5cd6f14266eb62c7ad4c03 100644 (file)
@@ -206,7 +206,8 @@ enum {
        MLX4_SET_PORT_GID_TABLE = 0x5,
        MLX4_SET_PORT_PRIO2TC   = 0x8,
        MLX4_SET_PORT_SCHEDULER = 0x9,
-       MLX4_SET_PORT_VXLAN     = 0xB
+       MLX4_SET_PORT_VXLAN     = 0xB,
+       MLX4_SET_PORT_ROCE_ADDR = 0xD
 };
 
 enum {
index d3133be12d922521e24528480edbdb31659a7007..430a929f048b3d2f27d1e842f45e80f9e9e6347a 100644 (file)
@@ -216,6 +216,7 @@ enum {
        MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN      = 1LL <<  30,
        MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
        MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
+       MLX4_DEV_CAP_FLAG2_ROCE_V1_V2           = 1ULL <<  33,
 };
 
 enum {
@@ -267,12 +268,14 @@ enum {
        MLX4_BMME_FLAG_TYPE_2_WIN       = 1 <<  9,
        MLX4_BMME_FLAG_RESERVED_LKEY    = 1 << 10,
        MLX4_BMME_FLAG_FAST_REG_WR      = 1 << 11,
+       MLX4_BMME_FLAG_ROCE_V1_V2       = 1 << 19,
        MLX4_BMME_FLAG_PORT_REMAP       = 1 << 24,
        MLX4_BMME_FLAG_VSD_INIT2RTR     = 1 << 28,
 };
 
 enum {
-       MLX4_FLAG_PORT_REMAP            = MLX4_BMME_FLAG_PORT_REMAP
+       MLX4_FLAG_PORT_REMAP            = MLX4_BMME_FLAG_PORT_REMAP,
+       MLX4_FLAG_ROCE_V1_V2            = MLX4_BMME_FLAG_ROCE_V1_V2
 };
 
 enum mlx4_event {
@@ -979,14 +982,11 @@ struct mlx4_mad_ifc {
        for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)     \
                if ((type) == (dev)->caps.port_mask[(port)])
 
-#define mlx4_foreach_non_ib_transport_port(port, dev)                     \
-       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
-               if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
-
 #define mlx4_foreach_ib_transport_port(port, dev)                         \
-       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
+       for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)       \
                if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
-                       ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+                       ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
+                       ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
 
 #define MLX4_INVALID_SLAVE_ID  0xFF
 #define MLX4_SINK_COUNTER_INDEX(dev)   (dev->caps.max_counters - 1)
@@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
 int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
 int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
index fe052e234906da97e88206d0b05db2442bced61f..587cdf943b524abd88fa945844b1e5b209b7fe2e 100644 (file)
@@ -194,7 +194,7 @@ struct mlx4_qp_context {
        u8                      mtu_msgmax;
        u8                      rq_size_stride;
        u8                      sq_size_stride;
-       u8                      rlkey;
+       u8                      rlkey_roce_mode;
        __be32                  usr_page;
        __be32                  local_qpn;
        __be32                  remote_qpn;
@@ -204,7 +204,8 @@ struct mlx4_qp_context {
        u32                     reserved1;
        __be32                  next_send_psn;
        __be32                  cqn_send;
-       u32                     reserved2[2];
+       __be16                  roce_entropy;
+       __be16                  reserved2[3];
        __be32                  last_acked_psn;
        __be32                  ssn;
        __be32                  params2;
@@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 
 void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
+static inline u16 folded_qp(u32 q)
+{
+       u16 res;
+
+       res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00);
+       return res;
+}
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
+
 #endif /* MLX4_QP_H */
index 7be845e306897ccd9d598ace7341976733164e5a..987764afa65c2344d5a85328f9f35a419e21db64 100644 (file)
@@ -223,6 +223,14 @@ enum {
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
 #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
 
+#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
+
+enum {
+       MLX5_EVENT_QUEUE_TYPE_QP = 0,
+       MLX5_EVENT_QUEUE_TYPE_RQ = 1,
+       MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+};
+
 enum mlx5_event {
        MLX5_EVENT_TYPE_COMP               = 0x0,
 
@@ -279,6 +287,26 @@ enum {
        MLX5_DEV_CAP_FLAG_CMDIF_CSUM    = 3LL << 46,
 };
 
+enum {
+       MLX5_ROCE_VERSION_1             = 0,
+       MLX5_ROCE_VERSION_2             = 2,
+};
+
+enum {
+       MLX5_ROCE_VERSION_1_CAP         = 1 << MLX5_ROCE_VERSION_1,
+       MLX5_ROCE_VERSION_2_CAP         = 1 << MLX5_ROCE_VERSION_2,
+};
+
+enum {
+       MLX5_ROCE_L3_TYPE_IPV4          = 0,
+       MLX5_ROCE_L3_TYPE_IPV6          = 1,
+};
+
+enum {
+       MLX5_ROCE_L3_TYPE_IPV4_CAP      = 1 << 1,
+       MLX5_ROCE_L3_TYPE_IPV6_CAP      = 1 << 2,
+};
+
 enum {
        MLX5_OPCODE_NOP                 = 0x00,
        MLX5_OPCODE_SEND_INVAL          = 0x01,
@@ -446,7 +474,7 @@ struct mlx5_init_seg {
        __be32                  rsvd2[880];
        __be32                  internal_timer_h;
        __be32                  internal_timer_l;
-       __be32                  rsrv3[2];
+       __be32                  rsvd3[2];
        __be32                  health_counter;
        __be32                  rsvd4[1019];
        __be64                  ieee1588_clk;
@@ -460,7 +488,9 @@ struct mlx5_eqe_comp {
 };
 
 struct mlx5_eqe_qp_srq {
-       __be32  reserved[6];
+       __be32  reserved1[5];
+       u8      type;
+       u8      reserved2[3];
        __be32  qp_srq_n;
 };
 
@@ -650,6 +680,12 @@ enum {
        CQE_RSS_HTYPE_L4        = 0x3 << 2,
 };
 
+enum {
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH        = 0x0,
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6       = 0x1,
+       MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4       = 0x2,
+};
+
 enum {
        CQE_L2_OK       = 1 << 0,
        CQE_L3_OK       = 1 << 1,
index 5162f3533042825d03ea5c6244d1735bd55a8663..1e3006dcf35d735175ebe054d8867f89b994231c 100644 (file)
@@ -115,6 +115,11 @@ enum {
        MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum {
+       MLX5_ATOMIC_OPS_CMP_SWAP        = 1 << 0,
+       MLX5_ATOMIC_OPS_FETCH_ADD       = 1 << 1,
+};
+
 enum mlx5_page_fault_resume_flags {
        MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
        MLX5_PAGE_FAULT_RESUME_WRITE     = 1 << 1,
@@ -341,9 +346,11 @@ struct mlx5_core_mr {
 };
 
 enum mlx5_res_type {
-       MLX5_RES_QP,
-       MLX5_RES_SRQ,
-       MLX5_RES_XSRQ,
+       MLX5_RES_QP     = MLX5_EVENT_QUEUE_TYPE_QP,
+       MLX5_RES_RQ     = MLX5_EVENT_QUEUE_TYPE_RQ,
+       MLX5_RES_SQ     = MLX5_EVENT_QUEUE_TYPE_SQ,
+       MLX5_RES_SRQ    = 3,
+       MLX5_RES_XSRQ   = 4,
 };
 
 struct mlx5_core_rsc_common {
@@ -651,13 +658,6 @@ extern struct workqueue_struct *mlx5_core_wq;
        .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
        .struct_size_bytes   = sizeof((struct ib_unpacked_ ## header *)0)->field
 
-struct ib_field {
-       size_t struct_offset_bytes;
-       size_t struct_size_bytes;
-       int    offset_bits;
-       int    size_bits;
-};
-
 static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
 {
        return pci_get_drvdata(pdev);
index 68d73f82e009e60654952f237c0f9fed01bd67e5..231ab6bcea76356ecf6539513fc210fe1e208451 100644 (file)
@@ -66,6 +66,11 @@ enum {
        MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN   = 0x3
 };
 
+enum {
+       MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
+       MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
+};
+
 enum {
        MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
        MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -573,21 +578,24 @@ enum {
 struct mlx5_ifc_atomic_caps_bits {
        u8         reserved_0[0x40];
 
-       u8         atomic_req_endianness[0x1];
-       u8         reserved_1[0x1f];
+       u8         atomic_req_8B_endianess_mode[0x2];
+       u8         reserved_1[0x4];
+       u8         supported_atomic_req_8B_endianess_mode_1[0x1];
 
-       u8         reserved_2[0x20];
+       u8         reserved_2[0x19];
 
-       u8         reserved_3[0x10];
-       u8         atomic_operations[0x10];
+       u8         reserved_3[0x20];
 
        u8         reserved_4[0x10];
-       u8         atomic_size_qp[0x10];
+       u8         atomic_operations[0x10];
 
        u8         reserved_5[0x10];
+       u8         atomic_size_qp[0x10];
+
+       u8         reserved_6[0x10];
        u8         atomic_size_dc[0x10];
 
-       u8         reserved_6[0x720];
+       u8         reserved_7[0x720];
 };
 
 struct mlx5_ifc_odp_cap_bits {
@@ -850,7 +858,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_66[0x8];
        u8         log_uar_page_sz[0x10];
 
-       u8         reserved_67[0x40];
+       u8         reserved_67[0x20];
+       u8         device_frequency_mhz[0x20];
        u8         device_frequency_khz[0x20];
        u8         reserved_68[0x5f];
        u8         cqe_zip[0x1];
@@ -2215,19 +2224,25 @@ struct mlx5_ifc_nic_vport_context_bits {
 
        u8         mtu[0x10];
 
-       u8         reserved_3[0x640];
+       u8         system_image_guid[0x40];
+       u8         port_guid[0x40];
+       u8         node_guid[0x40];
+
+       u8         reserved_3[0x140];
+       u8         qkey_violation_counter[0x10];
+       u8         reserved_4[0x430];
 
        u8         promisc_uc[0x1];
        u8         promisc_mc[0x1];
        u8         promisc_all[0x1];
-       u8         reserved_4[0x2];
+       u8         reserved_5[0x2];
        u8         allowed_list_type[0x3];
-       u8         reserved_5[0xc];
+       u8         reserved_6[0xc];
        u8         allowed_list_size[0xc];
 
        struct mlx5_ifc_mac_address_layout_bits permanent_address;
 
-       u8         reserved_6[0x20];
+       u8         reserved_7[0x20];
 
        u8         current_uc_mac_address[0][0x40];
 };
@@ -4199,6 +4214,13 @@ struct mlx5_ifc_modify_tis_out_bits {
        u8         reserved_1[0x40];
 };
 
+struct mlx5_ifc_modify_tis_bitmask_bits {
+       u8         reserved_0[0x20];
+
+       u8         reserved_1[0x1f];
+       u8         prio[0x1];
+};
+
 struct mlx5_ifc_modify_tis_in_bits {
        u8         opcode[0x10];
        u8         reserved_0[0x10];
@@ -4211,7 +4233,7 @@ struct mlx5_ifc_modify_tis_in_bits {
 
        u8         reserved_3[0x20];
 
-       u8         modify_bitmask[0x40];
+       struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
 
        u8         reserved_4[0x40];
 
index f079fb1a31f7f7953f0bb28f6f3a145279598dc7..5b8c89ffaa5830fdf05fbc40a1fce0b5d2ffa7f3 100644 (file)
@@ -85,7 +85,16 @@ enum mlx5_qp_state {
        MLX5_QP_STATE_ERR                       = 6,
        MLX5_QP_STATE_SQ_DRAINING               = 7,
        MLX5_QP_STATE_SUSPENDED                 = 9,
-       MLX5_QP_NUM_STATE
+       MLX5_QP_NUM_STATE,
+       MLX5_QP_STATE,
+       MLX5_QP_STATE_BAD,
+};
+
+enum {
+       MLX5_SQ_STATE_NA        = MLX5_SQC_STATE_ERR + 1,
+       MLX5_SQ_NUM_STATE       = MLX5_SQ_STATE_NA + 1,
+       MLX5_RQ_STATE_NA        = MLX5_RQC_STATE_ERR + 1,
+       MLX5_RQ_NUM_STATE       = MLX5_RQ_STATE_NA + 1,
 };
 
 enum {
@@ -130,6 +139,9 @@ enum {
        MLX5_QP_BIT_RWE                         = 1 << 14,
        MLX5_QP_BIT_RAE                         = 1 << 13,
        MLX5_QP_BIT_RIC                         = 1 <<  4,
+       MLX5_QP_BIT_CC_SLAVE_RECV               = 1 <<  2,
+       MLX5_QP_BIT_CC_SLAVE_SEND               = 1 <<  1,
+       MLX5_QP_BIT_CC_MASTER                   = 1 <<  0
 };
 
 enum {
@@ -248,8 +260,12 @@ struct mlx5_av {
        __be32  dqp_dct;
        u8      stat_rate_sl;
        u8      fl_mlid;
-       __be16  rlid;
-       u8      reserved0[10];
+       union {
+               __be16  rlid;
+               __be16  udp_sport;
+       };
+       u8      reserved0[4];
+       u8      rmac[6];
        u8      tclass;
        u8      hop_limit;
        __be32  grh_gid_fl;
@@ -456,11 +472,16 @@ struct mlx5_qp_path {
        u8                      static_rate;
        u8                      hop_limit;
        __be32                  tclass_flowlabel;
-       u8                      rgid[16];
-       u8                      rsvd1[4];
-       u8                      sl;
+       union {
+               u8              rgid[16];
+               u8              rip[16];
+       };
+       u8                      f_dscp_ecn_prio;
+       u8                      ecn_dscp;
+       __be16                  udp_sport;
+       u8                      dci_cfi_prio_sl;
        u8                      port;
-       u8                      rsvd2[6];
+       u8                      rmac[6];
 };
 
 struct mlx5_qp_context {
@@ -620,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
                        int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
-                       enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
                        struct mlx5_modify_qp_mbox_in *in, int sqd_event,
                        struct mlx5_core_qp *qp);
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
@@ -639,6 +659,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
                                u8 context, int error);
 #endif
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *rq);
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                               struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+                                 struct mlx5_core_qp *sq);
 
 static inline const char *mlx5_qp_type_str(int type)
 {
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
new file mode 100644 (file)
index 0000000..88441f5
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __TRANSOBJ_H__
+#define __TRANSOBJ_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                       u32 *rqn);
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                       u32 *sqn);
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *tirn);
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *tisn);
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
+int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *rmpn);
+int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
+int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                         u32 *rmpn);
+int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+                        u32 *rqtn);
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+                        int inlen);
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
+
+#endif /* __TRANSOBJ_H__ */
index 638f2ca7a527c01f63fe2c3b3cd8013fbaf70c86..123771003e68586571690f9a5211e5afaa0143e3 100644 (file)
@@ -45,6 +45,11 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
                                      u16 vport, u8 *addr);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+                                          u64 *system_image_guid);
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+                                       u16 *qkey_viol_cntr);
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
                             u8 port_num, u16  vf_num, u16 gid_index,
                             union ib_gid *gid);
@@ -85,4 +90,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
                                u16 vlans[],
                                int list_size);
 
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+
 #endif /* __MLX5_VPORT_H__ */
index f1cd22f2df1ac50438e7d70bb0df85c44580b8d3..516e149443397dcf712e4e62aca6fb3c78e542b6 100644 (file)
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSUP
 #else
-#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSDOWN
 #endif
 
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
@@ -1341,8 +1343,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
                !vma_growsup(vma->vm_next, addr);
 }
 
-extern struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
                unsigned long old_addr, struct vm_area_struct *new_vma,
index d3ebb9d21a5334d26e85bc865d318535f5864569..624b78b848b89fae506faa1f4f48ce86297ceace 100644 (file)
@@ -424,9 +424,9 @@ struct mm_struct {
        unsigned long total_vm;         /* Total pages mapped */
        unsigned long locked_vm;        /* Pages that have PG_mlocked set */
        unsigned long pinned_vm;        /* Refcount permanently increased */
-       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE */
-       unsigned long stack_vm;         /* VM_GROWSUP/DOWN */
+       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+       unsigned long stack_vm;         /* VM_STACK */
        unsigned long def_flags;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
index 33bb1b19273e3ad165382f4c7f0f3cd8f8c0b407..7b6c2cfee390e939e13b0f14c92224bcb3aafcf4 100644 (file)
@@ -682,6 +682,12 @@ typedef struct pglist_data {
         */
        unsigned long first_deferred_pfn;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spinlock_t split_queue_lock;
+       struct list_head split_queue;
+       unsigned long split_queue_len;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)        (NODE_DATA(nid)->node_present_pages)
index 4560d8f1545d2cec7e6da19da460fff26151c105..2bb0c308570672e7105b14f85a4215b0f9500207 100644 (file)
@@ -324,6 +324,12 @@ struct module_layout {
 #define __module_layout_align
 #endif
 
+struct mod_kallsyms {
+       Elf_Sym *symtab;
+       unsigned int num_symtab;
+       char *strtab;
+};
+
 struct module {
        enum module_state state;
 
@@ -405,15 +411,10 @@ struct module {
 #endif
 
 #ifdef CONFIG_KALLSYMS
-       /*
-        * We keep the symbol and string tables for kallsyms.
-        * The core_* fields below are temporary, loader-only (they
-        * could really be discarded after module init).
-        */
-       Elf_Sym *symtab, *core_symtab;
-       unsigned int num_symtab, core_num_syms;
-       char *strtab, *core_strtab;
-
+       /* Protected by RCU and/or module_mutex: use rcu_dereference() */
+       struct mod_kallsyms *kallsyms;
+       struct mod_kallsyms core_kallsyms;
+       
        /* Section attributes */
        struct module_sect_attrs *sect_attrs;
 
index 5ac140dcb789499e51b951fa45fb34378d636407..289c2314d76668b8357728382bb33d6828617458 100644 (file)
@@ -512,7 +512,6 @@ static inline void napi_enable(struct napi_struct *n)
        clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
-#ifdef CONFIG_SMP
 /**
  *     napi_synchronize - wait until NAPI is not running
  *     @n: napi context
@@ -523,12 +522,12 @@ static inline void napi_enable(struct napi_struct *n)
  */
 static inline void napi_synchronize(const struct napi_struct *n)
 {
-       while (test_bit(NAPI_STATE_SCHED, &n->state))
-               msleep(1);
+       if (IS_ENABLED(CONFIG_SMP))
+               while (test_bit(NAPI_STATE_SCHED, &n->state))
+                       msleep(1);
+       else
+               barrier();
 }
-#else
-# define napi_synchronize(n)   barrier()
-#endif
 
 enum netdev_queue_state_t {
        __QUEUE_STATE_DRV_XOFF,
index dd10626a615fb160587ecf09f860de36ba583c19..dc6e39696b64c8e925601f598b9cdab5684f1970 100644 (file)
@@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
        return num;
 }
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
 #define _OF_DECLARE(table, name, compat, fn, fn_type)                  \
        static const struct of_device_id __of_table_##name              \
                __used __section(__##table##_of_table)                  \
index 4d08b6c33557250edda8e949ff64f5f2fd1ffdc1..92395a0a7dc5c436cdd43317a798b6d15eba5a5e 100644 (file)
@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
                               unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
                        int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
                        pgoff_t index, unsigned flags);
index f9828a48f16addab4737683f3c8345ab87e52a0a..b35a61a481fa0460b3b32591e6dd52f2fa9077e7 100644 (file)
@@ -634,9 +634,6 @@ struct perf_event_context {
        int                             nr_cgroups;      /* cgroup evts */
        void                            *task_ctx_data; /* pmu specific data */
        struct rcu_head                 rcu_head;
-
-       struct delayed_work             orphans_remove;
-       bool                            orphans_remove_sched;
 };
 
 /*
@@ -729,7 +726,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1044,7 +1041,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
@@ -1070,7 +1067,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)     { }
 static inline void perf_event_free_task(struct task_struct *task)      { }
 static inline void perf_event_delayed_put(struct task_struct *task)    { }
-static inline struct perf_event *perf_event_get(unsigned int fd)       { return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)     { return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
        return ERR_PTR(-EINVAL);
index 0703b5360d31b5fa599a541bce9881941a170281..37448ab5fb5c2eddc80b97125ee80ecff4bc1b58 100644 (file)
@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
        return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
        return NULL;
 }
 
-static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
 {
        return PFN_PHYS(pfn_t_to_pfn(pfn));
 }
index eb8b8ac6df3c844e2bd84903e0a50ff07f1575fe..24f5470d394460c779ebdbd385d8f14766f14001 100644 (file)
@@ -42,6 +42,7 @@ struct pipe_buffer {
  *     @fasync_readers: reader side fasync
  *     @fasync_writers: writer side fasync
  *     @bufs: the circular array of pipe buffers
+ *     @user: the user who created this pipe
  **/
 struct pipe_inode_info {
        struct mutex mutex;
@@ -57,6 +58,7 @@ struct pipe_inode_info {
        struct fasync_struct *fasync_readers;
        struct fasync_struct *fasync_writers;
        struct pipe_buffer *bufs;
+       struct user_struct *user;
 };
 
 /*
@@ -123,6 +125,8 @@ void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 
 extern unsigned int pipe_max_size, pipe_min_size;
+extern unsigned long pipe_user_pages_hard;
+extern unsigned long pipe_user_pages_soft;
 int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 
 
diff --git a/include/linux/platform_data/sdhci-pic32.h b/include/linux/platform_data/sdhci-pic32.h
new file mode 100644 (file)
index 0000000..7e0efe6
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ */
+#ifndef __PIC32_SDHCI_PDATA_H__
+#define __PIC32_SDHCI_PDATA_H__
+
+struct pic32_sdhci_platform_data {
+       /* read & write fifo threshold */
+       int (*setup_dma)(u32 rfifo, u32 wfifo);
+};
+
+#endif
index acfea8ce4a07be8b6291c76ca0f53728b06ae5fa..7c3d11a6b4ad5d8d35b68a33f87801865adada8d 100644 (file)
@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 {
        BUG();
 }
+
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+{
+       BUG();
+}
 #endif
 
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
  * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
+ * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
+ * and arch_has_wmb_pmem().
  */
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 {
@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
        else
                default_clear_pmem(addr, size);
 }
+
+/**
+ * wb_cache_pmem - write back processor cache for PMEM memory range
+ * @addr:      virtual start address
+ * @size:      number of bytes to write back
+ *
+ * Write back the processor cache range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+{
+       if (arch_has_pmem_api())
+               arch_wb_cache_pmem(addr, size);
+}
 #endif /* __PMEM_H__ */
index 57e7d87d2d4c15de691d8fa3075f779ddbda5172..f54be708220760f9a367f4a29ca1cbe7cfb7fba2 100644 (file)
 #define RADIX_TREE_EXCEPTIONAL_ENTRY   2
 #define RADIX_TREE_EXCEPTIONAL_SHIFT   2
 
+#define RADIX_DAX_MASK 0xf
+#define RADIX_DAX_SHIFT        4
+#define RADIX_DAX_PTE  (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_PMD  (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+               RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
+
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
        return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
@@ -369,13 +378,29 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(struct radix_tree_root *root,
                             struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:      iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+       iter->next_index = iter->index;
+       return NULL;
+}
+
 /**
  * radix_tree_chunk_size - get current chunk size
  *
  * @iter:      pointer to radix tree iterator
  * Returns:    current chunk size
  */
-static __always_inline unsigned
+static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
        return iter->next_index - iter->index;
@@ -409,9 +434,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
                        return slot + offset + 1;
                }
        } else {
-               unsigned size = radix_tree_chunk_size(iter) - 1;
+               long size = radix_tree_chunk_size(iter);
 
-               while (size--) {
+               while (--size > 0) {
                        slot++;
                        iter->index++;
                        if (likely(*slot))
index a7a06d1dcf9cb17ee65d1befe1ef6ac62d882d16..a0118d5929a9c9ed17df3b75a8b599472ff1f205 100644 (file)
@@ -152,6 +152,8 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 
 # define jiffies       raid6_jiffies()
 # define printk        printf
+# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL    0
 # define __get_free_pages(x, y)        ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
                                                     PROT_READ|PROT_WRITE,   \
index bdf597c4f0be82965911266ed1668d06510492cb..a07f42bedda32687c94ed574f5a606b19912b042 100644 (file)
@@ -109,20 +109,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma)
                __put_anon_vma(anon_vma);
 }
 
-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma *anon_vma = vma->anon_vma;
-       if (anon_vma)
-               down_write(&anon_vma->root->rwsem);
-}
-
-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
-{
-       struct anon_vma *anon_vma = vma->anon_vma;
-       if (anon_vma)
-               up_write(&anon_vma->root->rwsem);
-}
-
 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
        down_write(&anon_vma->root->rwsem);
index f1e81e128592cbcf5b3c3e61949bcbe75b34c74f..a10494a94cc30f24d65ab866771833883c6f886d 100644 (file)
@@ -835,6 +835,7 @@ struct user_struct {
 #endif
        unsigned long locked_shm; /* How many pages of mlocked shm ? */
        unsigned long unix_inflight;    /* How many files in flight in unix sockets */
+       atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 
 #ifdef CONFIG_KEYS
        struct key *uid_keyring;        /* UID specific keyring */
index a43f41cb3c430166afc7fda3f09571007fe74acf..4d4780c00d345e9e5fd50f5394b89d93c57c966b 100644 (file)
@@ -15,10 +15,7 @@ struct shmem_inode_info {
        unsigned int            seals;          /* shmem seals */
        unsigned long           flags;
        unsigned long           alloced;        /* data pages alloced to file */
-       union {
-               unsigned long   swapped;        /* subtotal assigned to swap */
-               char            *symlink;       /* unswappable short symlink */
-       };
+       unsigned long           swapped;        /* subtotal assigned to swap */
        struct shared_policy    policy;         /* NUMA memory alloc policy */
        struct list_head        swaplist;       /* chain of maybes on swap */
        struct simple_xattrs    xattrs;         /* list of xattrs */
index f869807a0d0e2ca93629a7d25092f268dbc8f520..5322fea6fe4c7995ae3ad0cfad15a1818356576f 100644 (file)
@@ -51,6 +51,7 @@
 /* RPC/RDMA parameters and stats */
 extern unsigned int svcrdma_ord;
 extern unsigned int svcrdma_max_requests;
+extern unsigned int svcrdma_max_bc_requests;
 extern unsigned int svcrdma_max_req_size;
 
 extern atomic_t rdma_stat_recv;
@@ -69,6 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
+       struct list_head free;
        struct svc_rdma_op_ctxt *read_hdr;
        struct svc_rdma_fastreg_mr *frmr;
        int hdr_count;
@@ -112,6 +114,7 @@ struct svc_rdma_fastreg_mr {
        struct list_head frmr_list;
 };
 struct svc_rdma_req_map {
+       struct list_head free;
        unsigned long count;
        union {
                struct kvec sge[RPCSVC_MAXPAGES];
@@ -132,28 +135,32 @@ struct svcxprt_rdma {
        int                  sc_max_sge;
        int                  sc_max_sge_rd;     /* max sge for read target */
 
-       int                  sc_sq_depth;       /* Depth of SQ */
        atomic_t             sc_sq_count;       /* Number of SQ WR on queue */
-
-       int                  sc_max_requests;   /* Depth of RQ */
+       unsigned int         sc_sq_depth;       /* Depth of SQ */
+       unsigned int         sc_rq_depth;       /* Depth of RQ */
+       u32                  sc_max_requests;   /* Forward credits */
+       u32                  sc_max_bc_requests;/* Backward credits */
        int                  sc_max_req_size;   /* Size of each RQ WR buf */
 
        struct ib_pd         *sc_pd;
 
        atomic_t             sc_dma_used;
-       atomic_t             sc_ctxt_used;
+       spinlock_t           sc_ctxt_lock;
+       struct list_head     sc_ctxts;
+       int                  sc_ctxt_used;
+       spinlock_t           sc_map_lock;
+       struct list_head     sc_maps;
+
        struct list_head     sc_rq_dto_q;
        spinlock_t           sc_rq_dto_lock;
        struct ib_qp         *sc_qp;
        struct ib_cq         *sc_rq_cq;
        struct ib_cq         *sc_sq_cq;
-       struct ib_mr         *sc_phys_mr;       /* MR for server memory */
        int                  (*sc_reader)(struct svcxprt_rdma *,
                                          struct svc_rqst *,
                                          struct svc_rdma_op_ctxt *,
                                          int *, u32 *, u32, u32, u64, bool);
        u32                  sc_dev_caps;       /* distilled device caps */
-       u32                  sc_dma_lkey;       /* local dma key */
        unsigned int         sc_frmr_pg_list_len;
        struct list_head     sc_frmr_q;
        spinlock_t           sc_frmr_q_lock;
@@ -179,8 +186,18 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
+/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
+ * current NFSv4.1 implementation supports one backchannel slot.
+ */
+#define RPCRDMA_MAX_BC_REQUESTS        2
+
 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
 
+/* svc_rdma_backchannel.c */
+extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
+                                   struct rpcrdma_msg *rmsgp,
+                                   struct xdr_buf *rcvbuf);
+
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -206,6 +223,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
                                u32, u32, u64, bool);
 
 /* svc_rdma_sendto.c */
+extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
+                           struct svc_rdma_req_map *);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern struct rpcrdma_read_chunk *
        svc_rdma_get_read_chunk(struct rpcrdma_msg *);
@@ -214,13 +233,14 @@ extern struct rpcrdma_read_chunk *
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
                                enum rpcrdma_errcode);
-extern int svc_rdma_post_recv(struct svcxprt_rdma *);
+extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
-extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
+extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
+                                struct svc_rdma_req_map *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
                              struct svc_rdma_fastreg_mr *);
@@ -234,6 +254,7 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
index e7a018eaf3a255db2198049f543ea3111004852a..017fced60242ecdf5eaf9daeb0b035e18333d6e3 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
index 613c29bd6baf4763e57530b794df843d9cea61ee..e13a1ace50e902ad0ae38a81f5563d67eaa4a00f 100644 (file)
@@ -43,6 +43,9 @@
 /* Default weight of a bound cooling device */
 #define THERMAL_WEIGHT_DEFAULT 0
 
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID   -274000
+
 /* Unit conversion macros */
 #define DECI_KELVIN_TO_CELSIUS(t)      ({                      \
        long _t = (t);                                          \
@@ -167,6 +170,7 @@ struct thermal_attr {
  * @forced_passive:    If > 0, temperature at which to switch on all ACPI
  *                     processor cooling devices.  Currently only used by the
  *                     step-wise governor.
+ * @need_update:       if equals 1, thermal_zone_device_update needs to be invoked.
  * @ops:       operations this &thermal_zone_device supports
  * @tzp:       thermal zone parameters
  * @governor:  pointer to the governor for this thermal zone
@@ -194,6 +198,7 @@ struct thermal_zone_device {
        int emul_temperature;
        int passive;
        unsigned int forced_passive;
+       atomic_t need_update;
        struct thermal_zone_device_ops *ops;
        struct thermal_zone_params *tzp;
        struct thermal_governor *governor;
index 2fd8708ea88893cf57e03f0ae23688ac42c9d24c..d9fb4b043f56dd45b406d3b68c59da3c860c1f43 100644 (file)
@@ -649,6 +649,7 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern int  tty_lock_interruptible(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
index 0e32bc71245ef46b90aa21112e4d2bef42cc5950..ca73c503b92a758ad5ce9b6d022c53c0758951c5 100644 (file)
@@ -311,6 +311,7 @@ enum {
 
        __WQ_DRAINING           = 1 << 16, /* internal: workqueue is draining */
        __WQ_ORDERED            = 1 << 17, /* internal: workqueue is ordered */
+       __WQ_LEGACY             = 1 << 18, /* internal: create*_workqueue() */
 
        WQ_MAX_ACTIVE           = 512,    /* I like 512, better ideas? */
        WQ_MAX_UNBOUND_PER_CPU  = 4,      /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
        alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)                                         \
-       alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)                               \
-       alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
-                       1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+                       WQ_MEM_RECLAIM, 1, (name))
 #define create_singlethread_workqueue(name)                            \
-       alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+       alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
index ef03ae56b1c1cc18d9541697b353a01842b50210..8a0f55b6c2ba80e25c67caf89e2050fa58ae322f 100644 (file)
@@ -533,7 +533,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
                const unsigned int requested_sizes[]);
 int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb);
 int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb);
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking);
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+                  bool nonblocking);
 
 int vb2_core_streamon(struct vb2_queue *q, unsigned int type);
 int vb2_core_streamoff(struct vb2_queue *q, unsigned int type);
index 52899291f40144c0dab97e135b0c9762ade5c8d1..5ee3c689c86370b4b325309d337611c9b51af5d1 100644 (file)
@@ -252,6 +252,12 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_3DSP         0x0021
 #define L2CAP_PSM_IPSP         0x0023 /* 6LoWPAN */
 
+#define L2CAP_PSM_DYN_START    0x1001
+#define L2CAP_PSM_DYN_END      0xffff
+#define L2CAP_PSM_AUTO_END     0x10ff
+#define L2CAP_PSM_LE_DYN_START  0x0080
+#define L2CAP_PSM_LE_DYN_END   0x00ff
+
 /* channel identifier */
 #define L2CAP_CID_SIGNALING    0x0001
 #define L2CAP_CID_CONN_LESS    0x0002
index 6816f0fa5693dc275e1e4997375f29b5d99f7b64..30a56ab2ccfb05d7bc9a527ebdc09da19b3d5f7b 100644 (file)
@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
        return dst && !(dst->flags & DST_METADATA);
 }
 
+static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
+                                      const struct sk_buff *skb_b)
+{
+       const struct metadata_dst *a, *b;
+
+       if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
+               return 0;
+
+       a = (const struct metadata_dst *) skb_dst(skb_a);
+       b = (const struct metadata_dst *) skb_dst(skb_b);
+
+       if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
+               return 1;
+
+       return memcmp(&a->u.tun_info, &b->u.tun_info,
+                     sizeof(a->u.tun_info) + a->u.tun_info.options_len);
+}
+
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
index 877f682989b892fb1d70256bda6b33f03e34a0f5..295d291269e2c88ed4930041597a28f7c9a7a2f7 100644 (file)
@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 
 void ip6_route_input(struct sk_buff *skb);
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-                                  struct flowi6 *fl6);
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+                                        struct flowi6 *fl6, int flags);
+
+static inline struct dst_entry *ip6_route_output(struct net *net,
+                                                const struct sock *sk,
+                                                struct flowi6 *fl6)
+{
+       return ip6_route_output_flags(net, sk, fl6, 0);
+}
+
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
                                   int flags);
 
index 788ef58a66b9b78807d164bce85060cc760ee336..62e17d1319ff7423dbcf176815f04cecfcdf3371 100644 (file)
@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-#ifdef CONFIG_LOCKDEP
-# define CONNTRACK_LOCKS 8
-#else
-# define CONNTRACK_LOCKS 1024
-#endif
+#define CONNTRACK_LOCKS 1024
+
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
index 20e72129be1ce0063eeafcbaadcee1f37e0c614c..205630bb5010b8ac76b84651b302e488fc1c76ff 100644 (file)
@@ -756,7 +756,6 @@ struct sctp_transport {
 
        /* Reference counting. */
        atomic_t refcnt;
-       __u32    dead:1,
                /* RTO-Pending : A flag used to track if one of the DATA
                 *              chunks sent to this address is currently being
                 *              used to compute a RTT. If this flag is 0,
@@ -766,7 +765,7 @@ struct sctp_transport {
                 *              calculation completes (i.e. the DATA chunk
                 *              is SACK'd) clear this flag.
                 */
-                rto_pending:1,
+       __u32   rto_pending:1,
 
                /*
                 * hb_sent : a flag that signals that we have a pending
@@ -955,7 +954,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
-void sctp_transport_hold(struct sctp_transport *);
+int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
index b9e7b3d863a09db498e00004da4da8a781e90b9f..f5ea148853e2f76257aca80a4aef65a30d587538 100644 (file)
@@ -1035,18 +1035,6 @@ struct proto {
        struct list_head        node;
 #ifdef SOCK_REFCNT_DEBUG
        atomic_t                socks;
-#endif
-#ifdef CONFIG_MEMCG_KMEM
-       /*
-        * cgroup specific init/deinit functions. Called once for all
-        * protocols that implement it, from cgroups populate function.
-        * This function has to setup any files the protocol want to
-        * appear in the kmem cgroup filesystem.
-        */
-       int                     (*init_cgroup)(struct mem_cgroup *memcg,
-                                              struct cgroup_subsys *ss);
-       void                    (*destroy_cgroup)(struct mem_cgroup *memcg);
-       struct cg_proto         *(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
        int                     (*diag_destroy)(struct sock *sk, int err);
 };
index 7dda3d7adba8e905b3d14db7f3361d9a96493b46..aecd30308d500b53a1b46902f2b3d254bcbc39b6 100644 (file)
@@ -16,7 +16,7 @@ struct sock_reuseport {
 };
 
 extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
                                          u32 hash,
index 8ea19977ea530bffb57bebed734b977c95b603c5..f6f8f032c73e81326790239cf021d2e50414ce1b 100644 (file)
@@ -216,7 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
-/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+/* TCP initial congestion window as per rfc6928 */
 #define TCP_INIT_CWND          10
 
 /* Bit Flags for sysctl_tcp_fastopen */
index 11528591d0d714f3ea1004566cb799952a9ca5bd..c34c9002460c567a8e7e3e8cd9cb6acb751c97d8 100644 (file)
@@ -83,6 +83,8 @@ struct rdma_dev_addr {
        int bound_dev_if;
        enum rdma_transport_type transport;
        struct net *net;
+       enum rdma_network_type network;
+       int hoplimit;
 };
 
 /**
@@ -91,8 +93,8 @@ struct rdma_dev_addr {
  *
  * The dev_addr->net field must be initialized.
  */
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
-                     u16 *vlan_id);
+int rdma_translate_ip(const struct sockaddr *addr,
+                     struct rdma_dev_addr *dev_addr, u16 *vlan_id);
 
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -117,6 +119,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
                                     struct rdma_dev_addr *addr, void *context),
                    void *context);
 
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr);
+
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
@@ -125,8 +131,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 int rdma_addr_size(struct sockaddr *addr);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *smac, u16 *vlan_id, int if_index);
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *smac, u16 *vlan_id, int *if_index,
+                                int *hoplimit);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {
index 269a27cf0a46f37c7fd9fdc3fc24df2d5070c2d2..e30f19bd4a41ef6900cda863c99f3b238a30cd47 100644 (file)
@@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device    *device,
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
  * @ndev: In RoCE, the net device of the device. NULL means ignore.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the cached GID table where the GID was found.  This
@@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device    *device,
  */
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      enum ib_gid_type gid_type,
                       struct net_device *ndev,
                       u8               *port_num,
                       u16              *index);
@@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device,
  * GID value occurs
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
  * @port_num: The port number of the device where the GID value sould be
  *   searched.
  * @ndev: In RoCE, the net device of the device. Null means ignore.
@@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device,
  */
 int ib_find_cached_gid_by_port(struct ib_device *device,
                               const union ib_gid *gid,
+                              enum ib_gid_type gid_type,
                               u8               port_num,
                               struct net_device *ndev,
                               u16              *index);
index ec9b44dd3d806b20d80920a0cb37487ea2d12c24..0ff049bd9ad413c47b92aaaf116a43d1738e4b32 100644 (file)
@@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
 /**
  * ib_mad_recv_handler - callback handler for a received MAD.
  * @mad_agent: MAD agent requesting the received MAD.
+ * @send_buf: Send buffer if found, else NULL
  * @mad_recv_wc: Received work completion information on the received MAD.
  *
  * MADs received in response to a send request operation will be handed to
@@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
  * modify the data referenced by @mad_recv_wc.
  */
 typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+                                   struct ib_mad_send_buf *send_buf,
                                    struct ib_mad_recv_wc *mad_recv_wc);
 
 /**
index e99d8f9a4551d9889501c645e04139fd0f32a3f8..0f3daae44bf9bf5b440c9d15d6a4d90544d29b88 100644 (file)
@@ -41,6 +41,8 @@ enum {
        IB_ETH_BYTES  = 14,
        IB_VLAN_BYTES = 4,
        IB_GRH_BYTES  = 40,
+       IB_IP4_BYTES  = 20,
+       IB_UDP_BYTES  = 8,
        IB_BTH_BYTES  = 12,
        IB_DETH_BYTES = 8
 };
@@ -223,6 +225,27 @@ struct ib_unpacked_eth {
        __be16  type;
 };
 
+struct ib_unpacked_ip4 {
+       u8      ver;
+       u8      hdr_len;
+       u8      tos;
+       __be16  tot_len;
+       __be16  id;
+       __be16  frag_off;
+       u8      ttl;
+       u8      protocol;
+       __sum16 check;
+       __be32  saddr;
+       __be32  daddr;
+};
+
+struct ib_unpacked_udp {
+       __be16  sport;
+       __be16  dport;
+       __be16  length;
+       __be16  csum;
+};
+
 struct ib_unpacked_vlan {
        __be16  tag;
        __be16  type;
@@ -237,6 +260,10 @@ struct ib_ud_header {
        struct ib_unpacked_vlan vlan;
        int                     grh_present;
        struct ib_unpacked_grh  grh;
+       int                     ipv4_present;
+       struct ib_unpacked_ip4  ip4;
+       int                     udp_present;
+       struct ib_unpacked_udp  udp;
        struct ib_unpacked_bth  bth;
        struct ib_unpacked_deth deth;
        int                     immediate_present;
@@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field        *desc,
               void                         *buf,
               void                         *structure);
 
-void ib_ud_header_init(int                 payload_bytes,
-                      int                  lrh_present,
-                      int                  eth_present,
-                      int                  vlan_present,
-                      int                  grh_present,
-                      int                  immediate_present,
-                      struct ib_ud_header *header);
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
+
+int ib_ud_header_init(int                  payload_bytes,
+                     int                   lrh_present,
+                     int                   eth_present,
+                     int                   vlan_present,
+                     int                   grh_present,
+                     int                   ip_version,
+                     int                   udp_present,
+                     int                   immediate_present,
+                     struct ib_ud_header *header);
 
 int ib_ud_header_pack(struct ib_ud_header *header,
                      void                *buf);
index a5889f18807b62f6f2d46f01a4650b5876647051..2f8a65c1fca7b77bc03c20e3b886036448645ad1 100644 (file)
@@ -42,6 +42,7 @@
  */
 #define IB_PMA_CLASS_CAP_ALLPORTSELECT  cpu_to_be16(1 << 8)
 #define IB_PMA_CLASS_CAP_EXT_WIDTH      cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
 #define IB_PMA_CLASS_CAP_XMIT_WAIT      cpu_to_be16(1 << 12)
 
 #define IB_PMA_CLASS_PORT_INFO          cpu_to_be16(0x0001)
index 301969552d0a51e34dcd872daa303a6339721916..cdc1c81aa275bda38f97f1e71c3c774dfbc8c983 100644 (file)
@@ -160,6 +160,7 @@ struct ib_sa_path_rec {
        int          ifindex;
        /* ignored in IB */
        struct net  *net;
+       enum ib_gid_type gid_type;
 };
 
 static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
@@ -402,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
  */
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr);
 
 /**
index 120da1d7f57eb578c327507ad2affa5b89488dbb..284b00c8fea4a4f67943aa7979d1b51728e5732e 100644 (file)
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 #include <linux/socket.h>
+#include <linux/irq_poll.h>
 #include <uapi/linux/if_ether.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <linux/string.h>
+#include <linux/slab.h>
 
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_comp_wq;
 
 union ib_gid {
        u8      raw[16];
@@ -67,7 +73,17 @@ union ib_gid {
 
 extern union ib_gid zgid;
 
+enum ib_gid_type {
+       /* If link layer is Ethernet, this is RoCE V1 */
+       IB_GID_TYPE_IB        = 0,
+       IB_GID_TYPE_ROCE      = 0,
+       IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+       IB_GID_TYPE_SIZE
+};
+
+#define ROCE_V2_UDP_DPORT      4791
 struct ib_gid_attr {
+       enum ib_gid_type        gid_type;
        struct net_device       *ndev;
 };
 
@@ -98,6 +114,35 @@ enum rdma_protocol_type {
 __attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type);
 
+enum rdma_network_type {
+       RDMA_NETWORK_IB,
+       RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+       RDMA_NETWORK_IPV4,
+       RDMA_NETWORK_IPV6
+};
+
+static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
+{
+       if (network_type == RDMA_NETWORK_IPV4 ||
+           network_type == RDMA_NETWORK_IPV6)
+               return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+       /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
+       return IB_GID_TYPE_IB;
+}
+
+static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
+                                                           union ib_gid *gid)
+{
+       if (gid_type == IB_GID_TYPE_IB)
+               return RDMA_NETWORK_IB;
+
+       if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+               return RDMA_NETWORK_IPV4;
+       else
+               return RDMA_NETWORK_IPV6;
+}
+
 enum rdma_link_layer {
        IB_LINK_LAYER_UNSPECIFIED,
        IB_LINK_LAYER_INFINIBAND,
@@ -105,24 +150,32 @@ enum rdma_link_layer {
 };
 
 enum ib_device_cap_flags {
-       IB_DEVICE_RESIZE_MAX_WR         = 1,
-       IB_DEVICE_BAD_PKEY_CNTR         = (1<<1),
-       IB_DEVICE_BAD_QKEY_CNTR         = (1<<2),
-       IB_DEVICE_RAW_MULTI             = (1<<3),
-       IB_DEVICE_AUTO_PATH_MIG         = (1<<4),
-       IB_DEVICE_CHANGE_PHY_PORT       = (1<<5),
-       IB_DEVICE_UD_AV_PORT_ENFORCE    = (1<<6),
-       IB_DEVICE_CURR_QP_STATE_MOD     = (1<<7),
-       IB_DEVICE_SHUTDOWN_PORT         = (1<<8),
-       IB_DEVICE_INIT_TYPE             = (1<<9),
-       IB_DEVICE_PORT_ACTIVE_EVENT     = (1<<10),
-       IB_DEVICE_SYS_IMAGE_GUID        = (1<<11),
-       IB_DEVICE_RC_RNR_NAK_GEN        = (1<<12),
-       IB_DEVICE_SRQ_RESIZE            = (1<<13),
-       IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
-       IB_DEVICE_LOCAL_DMA_LKEY        = (1<<15),
-       IB_DEVICE_RESERVED              = (1<<16), /* old SEND_W_INV */
-       IB_DEVICE_MEM_WINDOW            = (1<<17),
+       IB_DEVICE_RESIZE_MAX_WR                 = (1 << 0),
+       IB_DEVICE_BAD_PKEY_CNTR                 = (1 << 1),
+       IB_DEVICE_BAD_QKEY_CNTR                 = (1 << 2),
+       IB_DEVICE_RAW_MULTI                     = (1 << 3),
+       IB_DEVICE_AUTO_PATH_MIG                 = (1 << 4),
+       IB_DEVICE_CHANGE_PHY_PORT               = (1 << 5),
+       IB_DEVICE_UD_AV_PORT_ENFORCE            = (1 << 6),
+       IB_DEVICE_CURR_QP_STATE_MOD             = (1 << 7),
+       IB_DEVICE_SHUTDOWN_PORT                 = (1 << 8),
+       IB_DEVICE_INIT_TYPE                     = (1 << 9),
+       IB_DEVICE_PORT_ACTIVE_EVENT             = (1 << 10),
+       IB_DEVICE_SYS_IMAGE_GUID                = (1 << 11),
+       IB_DEVICE_RC_RNR_NAK_GEN                = (1 << 12),
+       IB_DEVICE_SRQ_RESIZE                    = (1 << 13),
+       IB_DEVICE_N_NOTIFY_CQ                   = (1 << 14),
+
+       /*
+        * This device supports a per-device lkey or stag that can be
+        * used without performing a memory registration for the local
+        * memory.  Note that ULPs should never check this flag, but
+        * instead of use the local_dma_lkey flag in the ib_pd structure,
+        * which will always contain a usable lkey.
+        */
+       IB_DEVICE_LOCAL_DMA_LKEY                = (1 << 15),
+       IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
+       IB_DEVICE_MEM_WINDOW                    = (1 << 17),
        /*
         * Devices should set IB_DEVICE_UD_IP_SUM if they support
         * insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -130,18 +183,35 @@ enum ib_device_cap_flags {
         * incoming messages.  Setting this flag implies that the
         * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
         */
-       IB_DEVICE_UD_IP_CSUM            = (1<<18),
-       IB_DEVICE_UD_TSO                = (1<<19),
-       IB_DEVICE_XRC                   = (1<<20),
-       IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<21),
-       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
-       IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
-       IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
-       IB_DEVICE_RC_IP_CSUM            = (1<<25),
-       IB_DEVICE_RAW_IP_CSUM           = (1<<26),
-       IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-       IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30),
-       IB_DEVICE_ON_DEMAND_PAGING      = (1<<31),
+       IB_DEVICE_UD_IP_CSUM                    = (1 << 18),
+       IB_DEVICE_UD_TSO                        = (1 << 19),
+       IB_DEVICE_XRC                           = (1 << 20),
+
+       /*
+        * This device supports the IB "base memory management extension",
+        * which includes support for fast registrations (IB_WR_REG_MR,
+        * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
+        * also be set by any iWarp device which must support FRs to comply
+        * to the iWarp verbs spec.  iWarp devices also support the
+        * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
+        * stag.
+        */
+       IB_DEVICE_MEM_MGT_EXTENSIONS            = (1 << 21),
+       IB_DEVICE_BLOCK_MULTICAST_LOOPBACK      = (1 << 22),
+       IB_DEVICE_MEM_WINDOW_TYPE_2A            = (1 << 23),
+       IB_DEVICE_MEM_WINDOW_TYPE_2B            = (1 << 24),
+       IB_DEVICE_RC_IP_CSUM                    = (1 << 25),
+       IB_DEVICE_RAW_IP_CSUM                   = (1 << 26),
+       /*
+        * Devices should set IB_DEVICE_CROSS_CHANNEL if they
+        * support execution of WQEs that involve synchronization
+        * of I/O operations with single completion queue managed
+        * by hardware.
+        */
+       IB_DEVICE_CROSS_CHANNEL         = (1 << 27),
+       IB_DEVICE_MANAGED_FLOW_STEERING         = (1 << 29),
+       IB_DEVICE_SIGNATURE_HANDOVER            = (1 << 30),
+       IB_DEVICE_ON_DEMAND_PAGING              = (1 << 31),
 };
 
 enum ib_signature_prot_cap {
@@ -184,6 +254,7 @@ struct ib_odp_caps {
 
 enum ib_cq_creation_flags {
        IB_CQ_FLAGS_TIMESTAMP_COMPLETION   = 1 << 0,
+       IB_CQ_FLAGS_IGNORE_OVERRUN         = 1 << 1,
 };
 
 struct ib_cq_init_attr {
@@ -393,6 +464,7 @@ union rdma_protocol_stats {
 #define RDMA_CORE_CAP_PROT_IB           0x00100000
 #define RDMA_CORE_CAP_PROT_ROCE         0x00200000
 #define RDMA_CORE_CAP_PROT_IWARP        0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
 
 #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
                                        | RDMA_CORE_CAP_IB_MAD \
@@ -405,6 +477,12 @@ union rdma_protocol_stats {
                                        | RDMA_CORE_CAP_IB_CM   \
                                        | RDMA_CORE_CAP_AF_IB   \
                                        | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP                      \
+                                       (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+                                       | RDMA_CORE_CAP_IB_MAD  \
+                                       | RDMA_CORE_CAP_IB_CM   \
+                                       | RDMA_CORE_CAP_AF_IB   \
+                                       | RDMA_CORE_CAP_ETH_AH)
 #define RDMA_CORE_PORT_IWARP           (RDMA_CORE_CAP_PROT_IWARP \
                                        | RDMA_CORE_CAP_IW_CM)
 #define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
@@ -519,6 +597,17 @@ struct ib_grh {
        union ib_gid    dgid;
 };
 
+union rdma_network_hdr {
+       struct ib_grh ibgrh;
+       struct {
+               /* The IB spec states that if it's IPv4, the header
+                * is located in the last 20 bytes of the header.
+                */
+               u8              reserved[20];
+               struct iphdr    roce4grh;
+       };
+};
+
 enum {
        IB_MULTICAST_QPN = 0xffffff
 };
@@ -734,7 +823,6 @@ enum ib_wc_opcode {
        IB_WC_RDMA_READ,
        IB_WC_COMP_SWAP,
        IB_WC_FETCH_ADD,
-       IB_WC_BIND_MW,
        IB_WC_LSO,
        IB_WC_LOCAL_INV,
        IB_WC_REG_MR,
@@ -755,10 +843,14 @@ enum ib_wc_flags {
        IB_WC_IP_CSUM_OK        = (1<<3),
        IB_WC_WITH_SMAC         = (1<<4),
        IB_WC_WITH_VLAN         = (1<<5),
+       IB_WC_WITH_NETWORK_HDR_TYPE     = (1<<6),
 };
 
 struct ib_wc {
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        enum ib_wc_status       status;
        enum ib_wc_opcode       opcode;
        u32                     vendor_err;
@@ -777,6 +869,7 @@ struct ib_wc {
        u8                      port_num;       /* valid only for DR SMPs on switches */
        u8                      smac[ETH_ALEN];
        u16                     vlan_id;
+       u8                      network_hdr_type;
 };
 
 enum ib_cq_notify_flags {
@@ -866,6 +959,9 @@ enum ib_qp_type {
 enum ib_qp_create_flags {
        IB_QP_CREATE_IPOIB_UD_LSO               = 1 << 0,
        IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
+       IB_QP_CREATE_CROSS_CHANNEL              = 1 << 2,
+       IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
+       IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
        IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
@@ -1027,7 +1123,6 @@ enum ib_wr_opcode {
        IB_WR_REG_MR,
        IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
        IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
-       IB_WR_BIND_MW,
        IB_WR_REG_SIG_MR,
        /* reserve values for low level drivers' internal use.
         * These values will not be used at all in the ib core layer.
@@ -1062,26 +1157,16 @@ struct ib_sge {
        u32     lkey;
 };
 
-/**
- * struct ib_mw_bind_info - Parameters for a memory window bind operation.
- * @mr: A memory region to bind the memory window to.
- * @addr: The address where the memory window should begin.
- * @length: The length of the memory window, in bytes.
- * @mw_access_flags: Access flags from enum ib_access_flags for the window.
- *
- * This struct contains the shared parameters for type 1 and type 2
- * memory window bind operations.
- */
-struct ib_mw_bind_info {
-       struct ib_mr   *mr;
-       u64             addr;
-       u64             length;
-       int             mw_access_flags;
+struct ib_cqe {
+       void (*done)(struct ib_cq *cq, struct ib_wc *wc);
 };
 
 struct ib_send_wr {
        struct ib_send_wr      *next;
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        struct ib_sge          *sg_list;
        int                     num_sge;
        enum ib_wr_opcode       opcode;
@@ -1147,19 +1232,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
        return container_of(wr, struct ib_reg_wr, wr);
 }
 
-struct ib_bind_mw_wr {
-       struct ib_send_wr       wr;
-       struct ib_mw            *mw;
-       /* The new rkey for the memory window. */
-       u32                     rkey;
-       struct ib_mw_bind_info  bind_info;
-};
-
-static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
-{
-       return container_of(wr, struct ib_bind_mw_wr, wr);
-}
-
 struct ib_sig_handover_wr {
        struct ib_send_wr       wr;
        struct ib_sig_attrs    *sig_attrs;
@@ -1175,7 +1247,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
 
 struct ib_recv_wr {
        struct ib_recv_wr      *next;
-       u64                     wr_id;
+       union {
+               u64             wr_id;
+               struct ib_cqe   *wr_cqe;
+       };
        struct ib_sge          *sg_list;
        int                     num_sge;
 };
@@ -1190,20 +1265,10 @@ enum ib_access_flags {
        IB_ACCESS_ON_DEMAND     = (1<<6),
 };
 
-struct ib_phys_buf {
-       u64      addr;
-       u64      size;
-};
-
-struct ib_mr_attr {
-       struct ib_pd    *pd;
-       u64             device_virt_addr;
-       u64             size;
-       int             mr_access_flags;
-       u32             lkey;
-       u32             rkey;
-};
-
+/*
+ * XXX: these are apparently used for ->rereg_user_mr, no idea why they
+ * are hidden here instead of a uapi header!
+ */
 enum ib_mr_rereg_flags {
        IB_MR_REREG_TRANS       = 1,
        IB_MR_REREG_PD          = (1<<1),
@@ -1211,18 +1276,6 @@ enum ib_mr_rereg_flags {
        IB_MR_REREG_SUPPORTED   = ((IB_MR_REREG_ACCESS << 1) - 1)
 };
 
-/**
- * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
- * @wr_id:      Work request id.
- * @send_flags: Flags from ib_send_flags enum.
- * @bind_info:  More parameters of the bind operation.
- */
-struct ib_mw_bind {
-       u64                    wr_id;
-       int                    send_flags;
-       struct ib_mw_bind_info bind_info;
-};
-
 struct ib_fmr_attr {
        int     max_pages;
        int     max_maps;
@@ -1307,6 +1360,12 @@ struct ib_ah {
 
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
+enum ib_poll_context {
+       IB_POLL_DIRECT,         /* caller context, no hw completions */
+       IB_POLL_SOFTIRQ,        /* poll from softirq context */
+       IB_POLL_WORKQUEUE,      /* poll from workqueue */
+};
+
 struct ib_cq {
        struct ib_device       *device;
        struct ib_uobject      *uobject;
@@ -1315,6 +1374,12 @@ struct ib_cq {
        void                   *cq_context;
        int                     cqe;
        atomic_t                usecnt; /* count number of work queues */
+       enum ib_poll_context    poll_ctx;
+       struct ib_wc            *wc;
+       union {
+               struct irq_poll         iop;
+               struct work_struct      work;
+       };
 };
 
 struct ib_srq {
@@ -1363,7 +1428,6 @@ struct ib_mr {
        u64                iova;
        u32                length;
        unsigned int       page_size;
-       atomic_t           usecnt; /* count number of MWs */
 };
 
 struct ib_mw {
@@ -1724,11 +1788,6 @@ struct ib_device {
                                                      int wc_cnt);
        struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd,
                                                 int mr_access_flags);
-       struct ib_mr *             (*reg_phys_mr)(struct ib_pd *pd,
-                                                 struct ib_phys_buf *phys_buf_array,
-                                                 int num_phys_buf,
-                                                 int mr_access_flags,
-                                                 u64 *iova_start);
        struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
                                                  u64 start, u64 length,
                                                  u64 virt_addr,
@@ -1741,8 +1800,6 @@ struct ib_device {
                                                    int mr_access_flags,
                                                    struct ib_pd *pd,
                                                    struct ib_udata *udata);
-       int                        (*query_mr)(struct ib_mr *mr,
-                                              struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
        struct ib_mr *             (*alloc_mr)(struct ib_pd *pd,
                                               enum ib_mr_type mr_type,
@@ -1750,18 +1807,8 @@ struct ib_device {
        int                        (*map_mr_sg)(struct ib_mr *mr,
                                                struct scatterlist *sg,
                                                int sg_nents);
-       int                        (*rereg_phys_mr)(struct ib_mr *mr,
-                                                   int mr_rereg_mask,
-                                                   struct ib_pd *pd,
-                                                   struct ib_phys_buf *phys_buf_array,
-                                                   int num_phys_buf,
-                                                   int mr_access_flags,
-                                                   u64 *iova_start);
        struct ib_mw *             (*alloc_mw)(struct ib_pd *pd,
                                               enum ib_mw_type type);
-       int                        (*bind_mw)(struct ib_qp *qp,
-                                             struct ib_mw *mw,
-                                             struct ib_mw_bind *mw_bind);
        int                        (*dealloc_mw)(struct ib_mw *mw);
        struct ib_fmr *            (*alloc_fmr)(struct ib_pd *pd,
                                                int mr_access_flags,
@@ -1823,6 +1870,7 @@ struct ib_device {
        u16                          is_switch:1;
        u8                           node_type;
        u8                           phys_port_cnt;
+       struct ib_device_attr        attrs;
 
        /**
         * The following mandatory functions are used only at device
@@ -1888,6 +1936,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
        return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
 }
 
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+                                      size_t offset,
+                                      size_t len)
+{
+       const void __user *p = udata->inbuf + offset;
+       bool ret = false;
+       u8 *buf;
+
+       if (len > USHRT_MAX)
+               return false;
+
+       buf = kmalloc(len, GFP_KERNEL);
+       if (!buf)
+               return false;
+
+       if (copy_from_user(buf, p, len))
+               goto free;
+
+       ret = !memchr_inv(buf, 0, len);
+
+free:
+       kfree(buf);
+       return ret;
+}
+
 /**
  * ib_modify_qp_is_ok - Check that the supplied attribute mask
  * contains all required attributes and no attributes not allowed for
@@ -1912,9 +1985,6 @@ int ib_register_event_handler  (struct ib_event_handler *event_handler);
 int ib_unregister_event_handler(struct ib_event_handler *event_handler);
 void ib_dispatch_event(struct ib_event *event);
 
-int ib_query_device(struct ib_device *device,
-                   struct ib_device_attr *device_attr);
-
 int ib_query_port(struct ib_device *device,
                  u8 port_num, struct ib_port_attr *port_attr);
 
@@ -1967,6 +2037,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 }
 
 static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags &
+               (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
 {
        return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
 }
@@ -1978,8 +2059,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n
 
 static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
 {
-       return device->port_immutable[port_num].core_cap_flags &
-               (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+       return rdma_protocol_ib(device, port_num) ||
+               rdma_protocol_roce(device, port_num);
 }
 
 /**
@@ -2220,7 +2301,8 @@ int ib_modify_port(struct ib_device *device,
                   struct ib_port_modify *port_modify);
 
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index);
+               enum ib_gid_type gid_type, struct net_device *ndev,
+               u8 *port_num, u16 *index);
 
 int ib_find_pkey(struct ib_device *device,
                 u8 port_num, u16 pkey, u16 *index);
@@ -2454,6 +2536,11 @@ static inline int ib_post_recv(struct ib_qp *qp,
        return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
 }
 
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+               int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
+void ib_free_cq(struct ib_cq *cq);
+int ib_process_cq_direct(struct ib_cq *cq, int budget);
+
 /**
  * ib_create_cq - Creates a CQ on the specified device.
  * @device: The device on which to create the CQ.
@@ -2838,13 +2925,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
                dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
 }
 
-/**
- * ib_query_mr - Retrieves information about a specific memory region.
- * @mr: The memory region to retrieve information about.
- * @mr_attr: The attributes of the specified memory region.
- */
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
 /**
  * ib_dereg_mr - Deregisters a memory region and removes it from the
  *   HCA translation table.
@@ -2881,42 +2961,6 @@ static inline u32 ib_inc_rkey(u32 rkey)
        return ((rkey + 1) & mask) | (rkey & ~mask);
 }
 
-/**
- * ib_alloc_mw - Allocates a memory window.
- * @pd: The protection domain associated with the memory window.
- * @type: The type of the memory window (1 or 2).
- */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-/**
- * ib_bind_mw - Posts a work request to the send queue of the specified
- *   QP, which binds the memory window to the given address range and
- *   remote access attributes.
- * @qp: QP to post the bind work request on.
- * @mw: The memory window to bind.
- * @mw_bind: Specifies information about the memory window, including
- *   its address range, remote access rights, and associated memory region.
- *
- * If there is no immediate error, the function will update the rkey member
- * of the mw parameter to its new value. The bind operation can still fail
- * asynchronously.
- */
-static inline int ib_bind_mw(struct ib_qp *qp,
-                            struct ib_mw *mw,
-                            struct ib_mw_bind *mw_bind)
-{
-       /* XXX reference counting in corresponding MR? */
-       return mw->device->bind_mw ?
-               mw->device->bind_mw(qp, mw, mw_bind) :
-               -ENOSYS;
-}
-
-/**
- * ib_dealloc_mw - Deallocates a memory window.
- * @mw: The memory window to deallocate.
- */
-int ib_dealloc_mw(struct ib_mw *mw);
-
 /**
  * ib_alloc_fmr - Allocates a unmapped fast memory region.
  * @pd: The protection domain associated with the unmapped region.
diff --git a/include/scsi/iser.h b/include/scsi/iser.h
new file mode 100644 (file)
index 0000000..2e678fa
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *     - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ISCSI_ISER_H
+#define ISCSI_ISER_H
+
+#define ISER_ZBVA_NOT_SUP              0x80
+#define ISER_SEND_W_INV_NOT_SUP                0x40
+#define ISERT_ZBVA_NOT_USED            0x80
+#define ISERT_SEND_W_INV_NOT_USED      0x40
+
+#define ISCSI_CTRL     0x10
+#define ISER_HELLO     0x20
+#define ISER_HELLORPLY 0x30
+
+#define ISER_VER       0x10
+#define ISER_WSV       0x08
+#define ISER_RSV       0x04
+
+/**
+ * struct iser_cm_hdr - iSER CM header (from iSER Annex A12)
+ *
+ * @flags:        flags support (zbva, send_w_inv)
+ * @rsvd:         reserved
+ */
+struct iser_cm_hdr {
+       u8      flags;
+       u8      rsvd[3];
+} __packed;
+
+/**
+ * struct iser_ctrl - iSER header of iSCSI control PDU
+ *
+ * @flags:        opcode and read/write valid bits
+ * @rsvd:         reserved
+ * @write_stag:   write rkey
+ * @write_va:     write virtual address
+ * @reaf_stag:    read rkey
+ * @read_va:      read virtual address
+ */
+struct iser_ctrl {
+       u8      flags;
+       u8      rsvd[3];
+       __be32  write_stag;
+       __be64  write_va;
+       __be32  read_stag;
+       __be64  read_va;
+} __packed;
+
+#endif /* ISCSI_ISER_H */
index fdabbb4ddba92f801e2952fafaeb997c7fa03a33..f730b91e472f19f97b15ec8982443fe099dd5db4 100644 (file)
@@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+                             unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
+                              int count);
 
 /* main midi functions */
 
index 7990469a44ce3df7909bdd447cd743019f900f37..c4d76ff056c6efd3431a4ed7aa3c6f6888c69e80 100644 (file)
@@ -104,6 +104,7 @@ struct snd_timer_instance {
                           int event,
                           struct timespec * tstamp,
                           unsigned long resolution);
+       void (*disconnect)(struct snd_timer_instance *timeri);
        void *callback_data;
        unsigned long ticks;            /* auto-load ticks when expired */
        unsigned long cticks;           /* current ticks */
index 594b4b29a2241ba0493e22e09b0b66ea2d6c3cc4..4e4b2fa786097c3638e78a6df76995e598a4cecf 100644 (file)
@@ -43,7 +43,7 @@ struct extent_status;
        { EXT4_GET_BLOCKS_METADATA_NOFAIL,      "METADATA_NOFAIL" },    \
        { EXT4_GET_BLOCKS_NO_NORMALIZE,         "NO_NORMALIZE" },       \
        { EXT4_GET_BLOCKS_KEEP_SIZE,            "KEEP_SIZE" },          \
-       { EXT4_GET_BLOCKS_NO_LOCK,              "NO_LOCK" })
+       { EXT4_GET_BLOCKS_ZERO,                 "ZERO" })
 
 #define show_mflags(flags) __print_flags(flags, "",    \
        { EXT4_MAP_NEW,         "N" },                  \
index 98feb1b82896504bfc0a40157c84a22ca137e259..d6dfa05ba322c9465d3027c0c992a97eff6a504e 100644 (file)
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
 
        TP_STRUCT__entry(
                __string(driver, fence->ops->get_driver_name(fence))
-               __string(timeline, fence->ops->get_driver_name(fence))
+               __string(timeline, fence->ops->get_timeline_name(fence))
                __field(unsigned int, context)
                __field(unsigned int, seqno)
 
index 0f803d2783e3834194cb3f5cd3dd4da5c017304c..47c6212d8f3cecb07d62cb9ea81f6ea65ae0c35f 100644 (file)
@@ -46,10 +46,10 @@ SCAN_STATUS
 
 TRACE_EVENT(mm_khugepaged_scan_pmd,
 
-       TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
+       TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
                 bool referenced, int none_or_zero, int status),
 
-       TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),
+       TP_ARGS(mm, page, writable, referenced, none_or_zero, status),
 
        TP_STRUCT__entry(
                __field(struct mm_struct *, mm)
@@ -62,7 +62,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
 
        TP_fast_assign(
                __entry->mm = mm;
-               __entry->pfn = pfn;
+               __entry->pfn = page ? page_to_pfn(page) : -1;
                __entry->writable = writable;
                __entry->referenced = referenced;
                __entry->none_or_zero = none_or_zero;
@@ -104,10 +104,10 @@ TRACE_EVENT(mm_collapse_huge_page,
 
 TRACE_EVENT(mm_collapse_huge_page_isolate,
 
-       TP_PROTO(unsigned long pfn, int none_or_zero,
+       TP_PROTO(struct page *page, int none_or_zero,
                 bool referenced, bool  writable, int status),
 
-       TP_ARGS(pfn, none_or_zero, referenced, writable, status),
+       TP_ARGS(page, none_or_zero, referenced, writable, status),
 
        TP_STRUCT__entry(
                __field(unsigned long, pfn)
@@ -118,7 +118,7 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
        ),
 
        TP_fast_assign(
-               __entry->pfn = pfn;
+               __entry->pfn = page ? page_to_pfn(page) : -1;
                __entry->none_or_zero = none_or_zero;
                __entry->referenced = referenced;
                __entry->writable = writable;
index ff8f6c091a1504e3d18937244a07b6432c880fb2..f95f25e786ef0de423a3a036a1685ad53da496af 100644 (file)
@@ -15,7 +15,7 @@ struct softirq_action;
                         softirq_name(NET_TX)           \
                         softirq_name(NET_RX)           \
                         softirq_name(BLOCK)            \
-                        softirq_name(BLOCK_IOPOLL)     \
+                        softirq_name(IRQ_POLL)         \
                         softirq_name(TASKLET)          \
                         softirq_name(SCHED)            \
                         softirq_name(HRTIMER)          \
index 4cc989ad685164689072f90f1ef2c40f633c4088..f95e1c43c3fbc06fc2de769cd5360a5559d549b3 100644 (file)
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
index b8a5b3b86ad63ef0825e9a9aea1784bcc68fe425..149bec83a907515dccea4425acf3b71d8d18410f 100644 (file)
@@ -2,8 +2,11 @@
 #define _UAPI_LINUX_FS_H
 
 /*
- * This file has definitions for some important file table
- * structures etc.
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
  */
 
 #include <linux/limits.h>
@@ -146,6 +149,37 @@ struct inodes_stat_t {
 #define MS_MGC_VAL 0xC0ED0000
 #define MS_MGC_MSK 0xffff0000
 
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_DAX           0x00008000      /* use DAX for IO */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
+
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
 
@@ -188,7 +222,6 @@ struct inodes_stat_t {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
-#define BLKDAXSET _IO(0x12,128)
 #define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
@@ -210,9 +243,28 @@ struct inodes_stat_t {
 #define FS_IOC32_SETFLAGS              _IOW('f', 2, int)
 #define FS_IOC32_GETVERSION            _IOR('v', 1, int)
 #define FS_IOC32_SETVERSION            _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR              _IOR ('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW ('X', 32, struct fsxattr)
 
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
  */
 #define        FS_SECRM_FL                     0x00000001 /* Secure deletion */
 #define        FS_UNRM_FL                      0x00000002 /* Undelete */
@@ -226,8 +278,8 @@ struct inodes_stat_t {
 #define FS_DIRTY_FL                    0x00000100
 #define FS_COMPRBLK_FL                 0x00000200 /* One or more compressed clusters */
 #define FS_NOCOMP_FL                   0x00000400 /* Don't compress */
-#define FS_ECOMPR_FL                   0x00000800 /* Compression error */
 /* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL                  0x00000800 /* Encrypted file */
 #define FS_BTREE_FL                    0x00001000 /* btree format dir */
 #define FS_INDEX_FL                    0x00001000 /* hash-indexed directory */
 #define FS_IMAGIC_FL                   0x00002000 /* AFS directory */
@@ -235,9 +287,12 @@ struct inodes_stat_t {
 #define FS_NOTAIL_FL                   0x00008000 /* file tail should not be merged */
 #define FS_DIRSYNC_FL                  0x00010000 /* dirsync behaviour (directories only) */
 #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL                        0x00040000 /* Reserved for ext4 */
 #define FS_EXTENT_FL                   0x00080000 /* Extents */
-#define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
+#define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
 
index f4617cf07069213262f5c1c8bfb097ca7989b8ad..781c1399c6a3b0fa0b81c9567dd3096276a17d65 100644 (file)
@@ -795,7 +795,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 
        ro = mnt_want_write(mnt);       /* we'll drop it in any case */
        error = 0;
-       mutex_lock(&d_inode(root)->i_mutex);
+       inode_lock(d_inode(root));
        path.dentry = lookup_one_len(name->name, root, strlen(name->name));
        if (IS_ERR(path.dentry)) {
                error = PTR_ERR(path.dentry);
@@ -841,7 +841,7 @@ out_putfd:
                put_unused_fd(fd);
                fd = error;
        }
-       mutex_unlock(&d_inode(root)->i_mutex);
+       inode_unlock(d_inode(root));
        if (!ro)
                mnt_drop_write(mnt);
 out_putname:
@@ -866,7 +866,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        err = mnt_want_write(mnt);
        if (err)
                goto out_name;
-       mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
        dentry = lookup_one_len(name->name, mnt->mnt_root,
                                strlen(name->name));
        if (IS_ERR(dentry)) {
@@ -884,7 +884,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        dput(dentry);
 
 out_unlock:
-       mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
+       inode_unlock(d_inode(mnt->mnt_root));
        if (inode)
                iput(inode);
        mnt_drop_write(mnt);
index b471e5a3863ddbca70f2bf4dee22f40df0345fbe..cddd5b5fde514a205ed24a46b7a273643ab62c7c 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1493,7 +1493,7 @@ out_rcu_wakeup:
        wake_up_sem_queue_do(&tasks);
 out_free:
        if (sem_io != fast_sem_io)
-               ipc_free(sem_io, sizeof(ushort)*nsems);
+               ipc_free(sem_io);
        return err;
 }
 
index 0f401d94b7c657d5e7126fe78f149c94ffea8e24..798cad18dd878f2ee81f03e34431dbc001e2c2a9 100644 (file)
@@ -414,17 +414,12 @@ void *ipc_alloc(int size)
 /**
  * ipc_free - free ipc space
  * @ptr: pointer returned by ipc_alloc
- * @size: size of block
  *
- * Free a block created with ipc_alloc(). The caller must know the size
- * used in the allocation call.
+ * Free a block created with ipc_alloc().
  */
-void ipc_free(void *ptr, int size)
+void ipc_free(void *ptr)
 {
-       if (size > PAGE_SIZE)
-               vfree(ptr);
-       else
-               kfree(ptr);
+       kvfree(ptr);
 }
 
 /**
index 3a8a5a0eca6252f04cf188fa6921291f9ec6fc31..51f7ca58ac6742989c61c0e0fc8d73bc7eab6bf7 100644 (file)
@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
  * both function can sleep
  */
 void *ipc_alloc(int size);
-void ipc_free(void *ptr, int size);
+void ipc_free(void *ptr);
 
 /*
  * For allocation that need to be freed by RCU.
index 27c6046c2c3d459c1f2345b191e647b94fa7f02e..f84f8d06e1f6d1010b9e61065c71596f320e28d9 100644 (file)
@@ -95,7 +95,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
        if (IS_ERR(dentry))
                return (void *)dentry; /* returning an error */
        inode = path.dentry->d_inode;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
        if (unlikely(!audit_mark)) {
index 656c7e93ac0d30d3e42a8f7e0dfc7dd071360d78..9f194aad0adc0600ce257581315824f540eb9c0b 100644 (file)
@@ -364,7 +364,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
        struct dentry *d = kern_path_locked(watch->path, parent);
        if (IS_ERR(d))
                return PTR_ERR(d);
-       mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+       inode_unlock(d_backing_inode(parent->dentry));
        if (d_is_positive(d)) {
                /* update watch filter fields */
                watch->dev = d_backing_inode(d)->i_sb->s_dev;
index b0799bced518691bd847a16973be74184bcdcbaf..89ebbc4d1164fea26bdcb62561bd15838be6a2db 100644 (file)
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 {
        struct perf_event *event;
        const struct perf_event_attr *attr;
+       struct file *file;
 
-       event = perf_event_get(fd);
-       if (IS_ERR(event))
-               return event;
+       file = perf_event_get(fd);
+       if (IS_ERR(file))
+               return file;
+
+       event = file->private_data;
 
        attr = perf_event_attrs(event);
        if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
                goto err;
 
        if (attr->type == PERF_TYPE_RAW)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_HARDWARE)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_SOFTWARE &&
            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-               return event;
+               return file;
 err:
-       perf_event_release_kernel(event);
+       fput(file);
        return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-       struct perf_event *event = ptr;
-
-       perf_event_release_kernel(event);
+       fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
index c0957416b32edd7f945201839c3058128bd718f5..5946460b24250a264f75c442099bb52ca06b0269 100644 (file)
@@ -49,8 +49,6 @@
 
 #include <asm/irq_regs.h>
 
-static struct workqueue_struct *perf_wq;
-
 typedef int (*remote_function_f)(void *);
 
 struct remote_function_call {
@@ -126,44 +124,181 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
        return data.ret;
 }
 
-static void event_function_call(struct perf_event *event,
-                               int (*active)(void *),
-                               void (*inactive)(void *),
-                               void *data)
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+                         struct perf_event_context *ctx)
 {
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx)
+               raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+                           struct perf_event_context *ctx)
+{
+       if (ctx)
+               raw_spin_unlock(&ctx->lock);
+       raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
+#define TASK_TOMBSTONE ((void *)-1L)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+       return READ_ONCE(event->owner) == TASK_TOMBSTONE;
+}
+
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ *  - removing the last event from a task ctx; this is relatively straight
+ *    forward and is done in __perf_remove_from_context.
+ *
+ *  - adding the first event to a task ctx; this is tricky because we cannot
+ *    rely on ctx->is_active and therefore cannot use event_function_call().
+ *    See perf_install_in_context().
+ *
+ * This is because we need a ctx->lock serialized variable (ctx->is_active)
+ * to reliably determine if a particular task/context is scheduled in. The
+ * task_curr() use in task_function_call() is racy in that a remote context
+ * switch is not a single atomic operation.
+ *
+ * As is, the situation is 'safe' because we set rq->curr before we do the
+ * actual context switch. This means that task_curr() will fail early, but
+ * we'll continue spinning on ctx->is_active until we've passed
+ * perf_event_task_sched_out().
+ *
+ * Without this ctx->lock serialized variable we could have race where we find
+ * the task (and hence the context) would not be active while in fact they are.
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+                       struct perf_event_context *, void *);
+
+struct event_function_struct {
+       struct perf_event *event;
+       event_f func;
+       void *data;
+};
+
+static int event_function(void *info)
+{
+       struct event_function_struct *efs = info;
+       struct perf_event *event = efs->event;
        struct perf_event_context *ctx = event->ctx;
-       struct task_struct *task = ctx->task;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event_context *task_ctx = cpuctx->task_ctx;
+       int ret = 0;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       perf_ctx_lock(cpuctx, task_ctx);
+       /*
+        * Since we do the IPI call without holding ctx->lock things can have
+        * changed, double check we hit the task we set out to hit.
+        */
+       if (ctx->task) {
+               if (ctx->task != current) {
+                       ret = -EAGAIN;
+                       goto unlock;
+               }
+
+               /*
+                * We only use event_function_call() on established contexts,
+                * and event_function() is only ever called when active (or
+                * rather, we'll have bailed in task_function_call() or the
+                * above ctx->task != current test), therefore we must have
+                * ctx->is_active here.
+                */
+               WARN_ON_ONCE(!ctx->is_active);
+               /*
+                * And since we have ctx->is_active, cpuctx->task_ctx must
+                * match.
+                */
+               WARN_ON_ONCE(task_ctx != ctx);
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       efs->func(event, cpuctx, ctx, efs->data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+
+       return ret;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       int ret = event_function(&efs);
+       WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       if (!event->parent) {
+               /*
+                * If this is a !child event, we must hold ctx::mutex to
+                * stabilize the the event->ctx relation. See
+                * perf_event_ctx_lock().
+                */
+               lockdep_assert_held(&ctx->mutex);
+       }
 
        if (!task) {
-               cpu_function_call(event->cpu, active, data);
+               cpu_function_call(event->cpu, event_function, &efs);
                return;
        }
 
 again:
-       if (!task_function_call(task, active, data))
+       if (task == TASK_TOMBSTONE)
+               return;
+
+       if (!task_function_call(task, event_function, &efs))
                return;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (ctx->is_active) {
-               /*
-                * Reload the task pointer, it might have been changed by
-                * a concurrent perf_event_context_sched_out().
-                */
-               task = ctx->task;
-               raw_spin_unlock_irq(&ctx->lock);
-               goto again;
+       /*
+        * Reload the task pointer, it might have been changed by
+        * a concurrent perf_event_context_sched_out().
+        */
+       task = ctx->task;
+       if (task != TASK_TOMBSTONE) {
+               if (ctx->is_active) {
+                       raw_spin_unlock_irq(&ctx->lock);
+                       goto again;
+               }
+               func(event, NULL, ctx, data);
        }
-       inactive(data);
        raw_spin_unlock_irq(&ctx->lock);
 }
 
-#define EVENT_OWNER_KERNEL ((void *) -1)
-
-static bool is_kernel_event(struct perf_event *event)
-{
-       return event->owner == EVENT_OWNER_KERNEL;
-}
-
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
@@ -368,28 +503,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
        return event->clock();
 }
 
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-                         struct perf_event_context *ctx)
-{
-       raw_spin_lock(&cpuctx->ctx.lock);
-       if (ctx)
-               raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-                           struct perf_event_context *ctx)
-{
-       if (ctx)
-               raw_spin_unlock(&ctx->lock);
-       raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 static inline bool
@@ -579,13 +692,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /*
-        * next is NULL when called from perf_event_enable_on_exec()
-        * that will systematically cause a cgroup_switch()
-        */
-       if (next)
-               cgrp2 = perf_cgroup_from_task(next, NULL);
+       cgrp2 = perf_cgroup_from_task(next, NULL);
 
        /*
         * only schedule out current cgroup events if we know
@@ -611,8 +718,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /* prev can never be NULL */
        cgrp2 = perf_cgroup_from_task(prev, NULL);
 
        /*
@@ -917,7 +1022,7 @@ static void put_ctx(struct perf_event_context *ctx)
        if (atomic_dec_and_test(&ctx->refcount)) {
                if (ctx->parent_ctx)
                        put_ctx(ctx->parent_ctx);
-               if (ctx->task)
+               if (ctx->task && ctx->task != TASK_TOMBSTONE)
                        put_task_struct(ctx->task);
                call_rcu(&ctx->rcu_head, free_ctx);
        }
@@ -934,9 +1039,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * perf_event_context::mutex nests and those are:
  *
  *  - perf_event_exit_task_context()   [ child , 0 ]
- *      __perf_event_exit_task()
- *        sync_child_event()
- *          put_event()                        [ parent, 1 ]
+ *      perf_event_exit_event()
+ *        put_event()                  [ parent, 1 ]
  *
  *  - perf_event_init_context()                [ parent, 0 ]
  *      inherit_task_group()
@@ -979,8 +1083,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * Lock order:
  *     task_struct::perf_event_mutex
  *       perf_event_context::mutex
- *         perf_event_context::lock
  *         perf_event::child_mutex;
+ *           perf_event_context::lock
  *         perf_event::mmap_mutex
  *         mmap_sem
  */
@@ -1078,6 +1182,7 @@ static u64 primary_event_id(struct perf_event *event)
 
 /*
  * Get the perf_event_context for a task and lock it.
+ *
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
@@ -1118,9 +1223,12 @@ retry:
                        goto retry;
                }
 
-               if (!atomic_inc_not_zero(&ctx->refcount)) {
+               if (ctx->task == TASK_TOMBSTONE ||
+                   !atomic_inc_not_zero(&ctx->refcount)) {
                        raw_spin_unlock(&ctx->lock);
                        ctx = NULL;
+               } else {
+                       WARN_ON_ONCE(ctx->task != task);
                }
        }
        rcu_read_unlock();
@@ -1246,6 +1354,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+       lockdep_assert_held(&ctx->lock);
+
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
        event->attach_state |= PERF_ATTACH_CONTEXT;
 
@@ -1448,11 +1558,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
        if (is_cgroup_event(event)) {
                ctx->nr_cgroups--;
+               /*
+                * Because cgroup events are always per-cpu events, this will
+                * always be called from the right CPU.
+                */
                cpuctx = __get_cpu_context(ctx);
                /*
-                * if there are no more cgroup events
-                * then cler cgrp to avoid stale pointer
-                * in update_cgrp_time_from_cpuctx()
+                * If there are no more cgroup events then clear cgrp to avoid
+                * stale pointer in update_cgrp_time_from_cpuctx().
                 */
                if (!ctx->nr_cgroups)
                        cpuctx->cgrp = NULL;
@@ -1530,45 +1643,11 @@ out:
                perf_event__header_size(tmp);
 }
 
-/*
- * User event without the task.
- */
 static bool is_orphaned_event(struct perf_event *event)
 {
-       return event && !is_kernel_event(event) && !event->owner;
+       return event->state == PERF_EVENT_STATE_EXIT;
 }
 
-/*
- * Event has a parent but parent's task finished and it's
- * alive only because of children holding refference.
- */
-static bool is_orphaned_child(struct perf_event *event)
-{
-       return is_orphaned_event(event->parent);
-}
-
-static void orphans_remove_work(struct work_struct *work);
-
-static void schedule_orphans_remove(struct perf_event_context *ctx)
-{
-       if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
-               return;
-
-       if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
-               get_ctx(ctx);
-               ctx->orphans_remove_sched = true;
-       }
-}
-
-static int __init perf_workqueue_init(void)
-{
-       perf_wq = create_singlethread_workqueue("perf");
-       WARN(!perf_wq, "failed to create perf workqueue\n");
-       return perf_wq ? 0 : -1;
-}
-
-core_initcall(perf_workqueue_init);
-
 static inline int pmu_filter_match(struct perf_event *event)
 {
        struct pmu *pmu = event->pmu;
@@ -1629,9 +1708,6 @@ event_sched_out(struct perf_event *event,
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
        perf_pmu_enable(event->pmu);
 }
 
@@ -1655,21 +1731,8 @@ group_sched_out(struct perf_event *group_event,
                cpuctx->exclusive = 0;
 }
 
-struct remove_event {
-       struct perf_event *event;
-       bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-
-       if (re->detach_group)
-               perf_group_detach(event);
-       list_del_event(event, ctx);
-}
+#define DETACH_GROUP   0x01UL
+#define DETACH_STATE   0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1677,33 +1740,33 @@ static void ___perf_remove_from_context(void *info)
  * We disable the event on the hardware level first. After that we
  * remove it from the context list.
  */
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+                          struct perf_cpu_context *cpuctx,
+                          struct perf_event_context *ctx,
+                          void *info)
 {
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       unsigned long flags = (unsigned long)info;
 
-       raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
-       if (re->detach_group)
+       if (flags & DETACH_GROUP)
                perf_group_detach(event);
        list_del_event(event, ctx);
-       if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+       if (flags & DETACH_STATE)
+               event->state = PERF_EVENT_STATE_EXIT;
+
+       if (!ctx->nr_events && ctx->is_active) {
                ctx->is_active = 0;
-               cpuctx->task_ctx = NULL;
+               if (ctx->task) {
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+                       cpuctx->task_ctx = NULL;
+               }
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 /*
  * Remove the event from a task's (or a CPU's) list of events.
  *
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
  * remains valid.  This is OK when called from perf_release since
@@ -1711,73 +1774,32 @@ static int __perf_remove_from_context(void *info)
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-       struct perf_event_context *ctx = event->ctx;
-       struct remove_event re = {
-               .event = event,
-               .detach_group = detach_group,
-       };
+       lockdep_assert_held(&event->ctx->mutex);
 
-       lockdep_assert_held(&ctx->mutex);
-
-       event_function_call(event, __perf_remove_from_context,
-                           ___perf_remove_from_context, &re);
+       event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
  * Cross CPU call to disable a performance event
  */
-int __perf_event_disable(void *info)
-{
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-       /*
-        * If this is a per-task event, need to check whether this
-        * event's task is the current task on this cpu.
-        *
-        * Can trigger due to concurrent perf_event_context_sched_out()
-        * flipping contexts around.
-        */
-       if (ctx->task && cpuctx->task_ctx != ctx)
-               return -EINVAL;
-
-       raw_spin_lock(&ctx->lock);
-
-       /*
-        * If the event is on, turn it off.
-        * If it is in error state, leave it in error state.
-        */
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
-               update_context_time(ctx);
-               update_cgrp_time_from_event(event);
-               update_group_times(event);
-               if (event == event->group_leader)
-                       group_sched_out(event, cpuctx, ctx);
-               else
-                       event_sched_out(event, cpuctx, ctx);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
-
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
-}
-
-void ___perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+                                struct perf_cpu_context *cpuctx,
+                                struct perf_event_context *ctx,
+                                void *info)
 {
-       struct perf_event *event = info;
+       if (event->state < PERF_EVENT_STATE_INACTIVE)
+               return;
 
-       /*
-        * Since we have the lock this context can't be scheduled
-        * in, so we can change the state safely.
-        */
-       if (event->state == PERF_EVENT_STATE_INACTIVE) {
-               update_group_times(event);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
+       update_context_time(ctx);
+       update_cgrp_time_from_event(event);
+       update_group_times(event);
+       if (event == event->group_leader)
+               group_sched_out(event, cpuctx, ctx);
+       else
+               event_sched_out(event, cpuctx, ctx);
+       event->state = PERF_EVENT_STATE_OFF;
 }
 
 /*
@@ -1788,7 +1810,8 @@ void ___perf_event_disable(void *info)
  * remains valid.  This condition is satisifed when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
- * goes to exit will block in sync_child_event.
+ * goes to exit will block in perf_event_exit_event().
+ *
  * When called from perf_pending_event it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
@@ -1804,8 +1827,12 @@ static void _perf_event_disable(struct perf_event *event)
        }
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_disable,
-                           ___perf_event_disable, event);
+       event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+       event_function_local(event, __perf_event_disable, NULL);
 }
 
 /*
@@ -1918,9 +1945,6 @@ event_sched_in(struct perf_event *event,
        if (event->attr.exclusive)
                cpuctx->exclusive = 1;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
 out:
        perf_pmu_enable(event->pmu);
 
@@ -2039,7 +2063,8 @@ static void add_event_to_ctx(struct perf_event *event,
        event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
@@ -2058,16 +2083,15 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 }
 
-static void ___perf_install_in_context(void *info)
+static void ctx_resched(struct perf_cpu_context *cpuctx,
+                       struct perf_event_context *task_ctx)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-
-       /*
-        * Since the task isn't running, its safe to add the event, us holding
-        * the ctx->lock ensures the task won't get scheduled in.
-        */
-       add_event_to_ctx(event, ctx);
+       perf_pmu_disable(cpuctx->ctx.pmu);
+       if (task_ctx)
+               task_ctx_sched_out(cpuctx, task_ctx);
+       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+       perf_event_sched_in(cpuctx, task_ctx, current);
+       perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
 /*
@@ -2077,55 +2101,31 @@ static void ___perf_install_in_context(void *info)
  */
 static int  __perf_install_in_context(void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
+       struct perf_event_context *ctx = info;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
-       struct task_struct *task = current;
-
-       perf_ctx_lock(cpuctx, task_ctx);
-       perf_pmu_disable(cpuctx->ctx.pmu);
 
-       /*
-        * If there was an active task_ctx schedule it out.
-        */
-       if (task_ctx)
-               task_ctx_sched_out(task_ctx);
-
-       /*
-        * If the context we're installing events in is not the
-        * active task_ctx, flip them.
-        */
-       if (ctx->task && task_ctx != ctx) {
-               if (task_ctx)
-                       raw_spin_unlock(&task_ctx->lock);
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx->task) {
                raw_spin_lock(&ctx->lock);
+               /*
+                * If we hit the 'wrong' task, we've since scheduled and
+                * everything should be sorted, nothing to do!
+                */
                task_ctx = ctx;
-       }
+               if (ctx->task != current)
+                       goto unlock;
 
-       if (task_ctx) {
-               cpuctx->task_ctx = task_ctx;
-               task = task_ctx->task;
+               /*
+                * If task_ctx is set, it had better be to us.
+                */
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
+       } else if (task_ctx) {
+               raw_spin_lock(&task_ctx->lock);
        }
 
-       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-
-       update_context_time(ctx);
-       /*
-        * update cgrp time only if current cgrp
-        * matches event->cgrp. Must be done before
-        * calling add_event_to_ctx()
-        */
-       update_cgrp_time_from_event(event);
-
-       add_event_to_ctx(event, ctx);
-
-       /*
-        * Schedule everything back in
-        */
-       perf_event_sched_in(cpuctx, task_ctx, task);
-
-       perf_pmu_enable(cpuctx->ctx.pmu);
+       ctx_resched(cpuctx, task_ctx);
+unlock:
        perf_ctx_unlock(cpuctx, task_ctx);
 
        return 0;
@@ -2133,27 +2133,54 @@ static int  __perf_install_in_context(void *info)
 
 /*
  * Attach a performance event to a context
- *
- * First we add the event to the list with the hardware enable bit
- * in event->hw_config cleared.
- *
- * If the event is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,
                        struct perf_event *event,
                        int cpu)
 {
+       struct task_struct *task = NULL;
+
        lockdep_assert_held(&ctx->mutex);
 
        event->ctx = ctx;
        if (event->cpu != -1)
                event->cpu = cpu;
 
-       event_function_call(event, __perf_install_in_context,
-                           ___perf_install_in_context, event);
+       /*
+        * Installing events is tricky because we cannot rely on ctx->is_active
+        * to be set in case this is the nr_events 0 -> 1 transition.
+        *
+        * So what we do is we add the event to the list here, which will allow
+        * a future context switch to DTRT and then send a racy IPI. If the IPI
+        * fails to hit the right task, this means a context switch must have
+        * happened and that will have taken care of business.
+        */
+       raw_spin_lock_irq(&ctx->lock);
+       task = ctx->task;
+       /*
+        * Worse, we cannot even rely on the ctx actually existing anymore. If
+        * between find_get_context() and perf_install_in_context() the task
+        * went through perf_event_exit_task() its dead and we should not be
+        * adding new events.
+        */
+       if (task == TASK_TOMBSTONE) {
+               raw_spin_unlock_irq(&ctx->lock);
+               return;
+       }
+       update_context_time(ctx);
+       /*
+        * Update cgrp time only if current cgrp matches event->cgrp.
+        * Must be done before calling add_event_to_ctx().
+        */
+       update_cgrp_time_from_event(event);
+       add_event_to_ctx(event, ctx);
+       raw_spin_unlock_irq(&ctx->lock);
+
+       if (task)
+               task_function_call(task, __perf_install_in_context, ctx);
+       else
+               cpu_function_call(cpu, __perf_install_in_context, ctx);
 }
 
 /*
@@ -2180,43 +2207,30 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 /*
  * Cross CPU call to enable a performance event
  */
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
        struct perf_event *leader = event->group_leader;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-       int err;
+       struct perf_event_context *task_ctx;
 
-       /*
-        * There's a time window between 'ctx->is_active' check
-        * in perf_event_enable function and this place having:
-        *   - IRQs on
-        *   - ctx->lock unlocked
-        *
-        * where the task could be killed and 'ctx' deactivated
-        * by perf_event_exit_task.
-        */
-       if (!ctx->is_active)
-               return -EINVAL;
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <= PERF_EVENT_STATE_ERROR)
+               return;
 
-       raw_spin_lock(&ctx->lock);
        update_context_time(ctx);
-
-       if (event->state >= PERF_EVENT_STATE_INACTIVE)
-               goto unlock;
-
-       /*
-        * set current task's cgroup time reference point
-        */
-       perf_cgroup_set_timestamp(current, ctx);
-
        __perf_event_mark_enabled(event);
 
+       if (!ctx->is_active)
+               return;
+
        if (!event_filter_match(event)) {
-               if (is_cgroup_event(event))
+               if (is_cgroup_event(event)) {
+                       perf_cgroup_set_timestamp(current, ctx); // XXX ?
                        perf_cgroup_defer_enabled(event);
-               goto unlock;
+               }
+               return;
        }
 
        /*
@@ -2224,41 +2238,13 @@ static int __perf_event_enable(void *info)
         * then don't put it on unless the group is on.
         */
        if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
-               goto unlock;
-
-       if (!group_can_go_on(event, cpuctx, 1)) {
-               err = -EEXIST;
-       } else {
-               if (event == leader)
-                       err = group_sched_in(event, cpuctx, ctx);
-               else
-                       err = event_sched_in(event, cpuctx, ctx);
-       }
-
-       if (err) {
-               /*
-                * If this event can't go on and it's part of a
-                * group, then the whole group has to come off.
-                */
-               if (leader != event) {
-                       group_sched_out(leader, cpuctx, ctx);
-                       perf_mux_hrtimer_restart(cpuctx);
-               }
-               if (leader->attr.pinned) {
-                       update_group_times(leader);
-                       leader->state = PERF_EVENT_STATE_ERROR;
-               }
-       }
-
-unlock:
-       raw_spin_unlock(&ctx->lock);
+               return;
 
-       return 0;
-}
+       task_ctx = cpuctx->task_ctx;
+       if (ctx->task)
+               WARN_ON_ONCE(task_ctx != ctx);
 
-void ___perf_event_enable(void *info)
-{
-       __perf_event_mark_enabled((struct perf_event *)info);
+       ctx_resched(cpuctx, task_ctx);
 }
 
 /*
@@ -2275,7 +2261,8 @@ static void _perf_event_enable(struct perf_event *event)
        struct perf_event_context *ctx = event->ctx;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <  PERF_EVENT_STATE_ERROR) {
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
@@ -2291,8 +2278,7 @@ static void _perf_event_enable(struct perf_event *event)
                event->state = PERF_EVENT_STATE_OFF;
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_enable,
-                           ___perf_event_enable, event);
+       event_function_call(event, __perf_event_enable, NULL);
 }
 
 /*
@@ -2342,12 +2328,27 @@ static void ctx_sched_out(struct perf_event_context *ctx,
                          struct perf_cpu_context *cpuctx,
                          enum event_type_t event_type)
 {
-       struct perf_event *event;
        int is_active = ctx->is_active;
+       struct perf_event *event;
 
-       ctx->is_active &= ~event_type;
-       if (likely(!ctx->nr_events))
+       lockdep_assert_held(&ctx->lock);
+
+       if (likely(!ctx->nr_events)) {
+               /*
+                * See __perf_remove_from_context().
+                */
+               WARN_ON_ONCE(ctx->is_active);
+               if (ctx->task)
+                       WARN_ON_ONCE(cpuctx->task_ctx);
                return;
+       }
+
+       ctx->is_active &= ~event_type;
+       if (ctx->task) {
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+               if (!ctx->is_active)
+                       cpuctx->task_ctx = NULL;
+       }
 
        update_context_time(ctx);
        update_cgrp_time_from_cpuctx(cpuctx);
@@ -2518,17 +2519,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
-                       /*
-                        * XXX do we need a memory barrier of sorts
-                        * wrt to rcu_dereference() of perf_event_ctxp
-                        */
-                       task->perf_event_ctxp[ctxn] = next_ctx;
-                       next->perf_event_ctxp[ctxn] = ctx;
-                       ctx->task = next;
-                       next_ctx->task = task;
+                       WRITE_ONCE(ctx->task, next);
+                       WRITE_ONCE(next_ctx->task, task);
 
                        swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
+                       /*
+                        * RCU_INIT_POINTER here is safe because we've not
+                        * modified the ctx and the above modification of
+                        * ctx->task and ctx->task_ctx_data are immaterial
+                        * since those values are always verified under
+                        * ctx->lock which we're now holding.
+                        */
+                       RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
+                       RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
+
                        do_switch = 0;
 
                        perf_event_sync_stat(ctx, next_ctx);
@@ -2541,8 +2546,7 @@ unlock:
 
        if (do_switch) {
                raw_spin_lock(&ctx->lock);
-               ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-               cpuctx->task_ctx = NULL;
+               task_ctx_sched_out(cpuctx, ctx);
                raw_spin_unlock(&ctx->lock);
        }
 }
@@ -2637,10 +2641,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
                perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx)
 {
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
        if (!cpuctx->task_ctx)
                return;
 
@@ -2648,7 +2651,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx)
                return;
 
        ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-       cpuctx->task_ctx = NULL;
 }
 
 /*
@@ -2725,13 +2727,22 @@ ctx_sched_in(struct perf_event_context *ctx,
             enum event_type_t event_type,
             struct task_struct *task)
 {
-       u64 now;
        int is_active = ctx->is_active;
+       u64 now;
+
+       lockdep_assert_held(&ctx->lock);
 
-       ctx->is_active |= event_type;
        if (likely(!ctx->nr_events))
                return;
 
+       ctx->is_active |= event_type;
+       if (ctx->task) {
+               if (!is_active)
+                       cpuctx->task_ctx = ctx;
+               else
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+       }
+
        now = perf_clock();
        ctx->timestamp = now;
        perf_cgroup_set_timestamp(task, ctx);
@@ -2773,12 +2784,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
         * cpu flexible, task flexible.
         */
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-
-       if (ctx->nr_events)
-               cpuctx->task_ctx = ctx;
-
-       perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
-
+       perf_event_sched_in(cpuctx, ctx, task);
        perf_pmu_enable(ctx->pmu);
        perf_ctx_unlock(cpuctx, ctx);
 }
@@ -2800,6 +2806,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        struct perf_event_context *ctx;
        int ctxn;
 
+       /*
+        * If cgroup events exist on this CPU, then we need to check if we have
+        * to switch in PMU state; cgroup event are system-wide mode only.
+        *
+        * Since cgroup events are CPU events, we must schedule these in before
+        * we schedule in the task events.
+        */
+       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
+               perf_cgroup_sched_in(prev, task);
+
        for_each_task_context_nr(ctxn) {
                ctx = task->perf_event_ctxp[ctxn];
                if (likely(!ctx))
@@ -2807,13 +2823,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
                perf_event_context_sched_in(ctx, task);
        }
-       /*
-        * if cgroup events exist on this CPU, then we need
-        * to check if we have to switch in PMU state.
-        * cgroup event are system-wide mode only
-        */
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_sched_in(prev, task);
 
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);
@@ -3099,46 +3108,30 @@ static int event_enable_on_exec(struct perf_event *event,
 static void perf_event_enable_on_exec(int ctxn)
 {
        struct perf_event_context *ctx, *clone_ctx = NULL;
+       struct perf_cpu_context *cpuctx;
        struct perf_event *event;
        unsigned long flags;
        int enabled = 0;
-       int ret;
 
        local_irq_save(flags);
        ctx = current->perf_event_ctxp[ctxn];
        if (!ctx || !ctx->nr_events)
                goto out;
 
-       /*
-        * We must ctxsw out cgroup events to avoid conflict
-        * when invoking perf_task_event_sched_in() later on
-        * in this function. Otherwise we end up trying to
-        * ctxswin cgroup events which are already scheduled
-        * in.
-        */
-       perf_cgroup_sched_out(current, NULL);
-
-       raw_spin_lock(&ctx->lock);
-       task_ctx_sched_out(ctx);
-
-       list_for_each_entry(event, &ctx->event_list, event_entry) {
-               ret = event_enable_on_exec(event, ctx);
-               if (ret)
-                       enabled = 1;
-       }
+       cpuctx = __get_cpu_context(ctx);
+       perf_ctx_lock(cpuctx, ctx);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               enabled |= event_enable_on_exec(event, ctx);
 
        /*
-        * Unclone this context if we enabled any event.
+        * Unclone and reschedule this context if we enabled any event.
         */
-       if (enabled)
+       if (enabled) {
                clone_ctx = unclone_ctx(ctx);
+               ctx_resched(cpuctx, ctx);
+       }
+       perf_ctx_unlock(cpuctx, ctx);
 
-       raw_spin_unlock(&ctx->lock);
-
-       /*
-        * Also calls ctxswin for cgroup events, if any:
-        */
-       perf_event_context_sched_in(ctx, ctx->task);
 out:
        local_irq_restore(flags);
 
@@ -3334,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
        INIT_LIST_HEAD(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        atomic_set(&ctx->refcount, 1);
-       INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3521,11 +3513,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 
 static void unaccount_event(struct perf_event *event)
 {
+       bool dec = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
        if (event->attr.comm)
@@ -3535,12 +3529,15 @@ static void unaccount_event(struct perf_event *event)
        if (event->attr.freq)
                atomic_dec(&nr_freq_events);
        if (event->attr.context_switch) {
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
                atomic_dec(&nr_switch_events);
        }
        if (is_cgroup_event(event))
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (has_branch_stack(event))
+               dec = true;
+
+       if (dec)
                static_key_slow_dec_deferred(&perf_sched_events);
 
        unaccount_event_cpu(event, event->cpu);
@@ -3556,7 +3553,7 @@ static void unaccount_event(struct perf_event *event)
  *  3) two matching events on the same context.
  *
  * The former two cases are handled in the allocation path (perf_event_alloc(),
- * __free_event()), the latter -- before the first perf_install_in_context().
+ * _free_event()), the latter -- before the first perf_install_in_context().
  */
 static int exclusive_event_init(struct perf_event *event)
 {
@@ -3631,29 +3628,6 @@ static bool exclusive_event_installable(struct perf_event *event,
        return true;
 }
 
-static void __free_event(struct perf_event *event)
-{
-       if (!event->parent) {
-               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-                       put_callchain_buffers();
-       }
-
-       perf_event_free_bpf_prog(event);
-
-       if (event->destroy)
-               event->destroy(event);
-
-       if (event->ctx)
-               put_ctx(event->ctx);
-
-       if (event->pmu) {
-               exclusive_event_destroy(event);
-               module_put(event->pmu->module);
-       }
-
-       call_rcu(&event->rcu_head, free_event_rcu);
-}
-
 static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
@@ -3675,7 +3649,25 @@ static void _free_event(struct perf_event *event)
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
 
-       __free_event(event);
+       if (!event->parent) {
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+                       put_callchain_buffers();
+       }
+
+       perf_event_free_bpf_prog(event);
+
+       if (event->destroy)
+               event->destroy(event);
+
+       if (event->ctx)
+               put_ctx(event->ctx);
+
+       if (event->pmu) {
+               exclusive_event_destroy(event);
+               module_put(event->pmu->module);
+       }
+
+       call_rcu(&event->rcu_head, free_event_rcu);
 }
 
 /*
@@ -3702,14 +3694,13 @@ static void perf_remove_from_owner(struct perf_event *event)
        struct task_struct *owner;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(event->owner);
        /*
-        * Matches the smp_wmb() in perf_event_exit_task(). If we observe
-        * !owner it means the list deletion is complete and we can indeed
-        * free this event, otherwise we need to serialize on
+        * Matches the smp_store_release() in perf_event_exit_task(). If we
+        * observe !owner it means the list deletion is complete and we can
+        * indeed free this event, otherwise we need to serialize on
         * owner->perf_event_mutex.
         */
-       smp_read_barrier_depends();
+       owner = lockless_dereference(event->owner);
        if (owner) {
                /*
                 * Since delayed_put_task_struct() also drops the last
@@ -3737,8 +3728,10 @@ static void perf_remove_from_owner(struct perf_event *event)
                 * ensured they're done, and we can proceed with freeing the
                 * event.
                 */
-               if (event->owner)
+               if (event->owner) {
                        list_del_init(&event->owner_entry);
+                       smp_store_release(&event->owner, NULL);
+               }
                mutex_unlock(&owner->perf_event_mutex);
                put_task_struct(owner);
        }
@@ -3746,36 +3739,98 @@ static void perf_remove_from_owner(struct perf_event *event)
 
 static void put_event(struct perf_event *event)
 {
-       struct perf_event_context *ctx;
-
        if (!atomic_long_dec_and_test(&event->refcount))
                return;
 
+       _free_event(event);
+}
+
+/*
+ * Kill an event dead; while event:refcount will preserve the event
+ * object, it will not preserve its functionality. Once the last 'user'
+ * gives up the object, we'll destroy the thing.
+ */
+int perf_event_release_kernel(struct perf_event *event)
+{
+       struct perf_event_context *ctx;
+       struct perf_event *child, *tmp;
+
        if (!is_kernel_event(event))
                perf_remove_from_owner(event);
 
+       ctx = perf_event_ctx_lock(event);
+       WARN_ON_ONCE(ctx->parent_ctx);
+       perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
+       perf_event_ctx_unlock(event, ctx);
+
        /*
-        * There are two ways this annotation is useful:
+        * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
+        * either from the above perf_remove_from_context() or through
+        * perf_event_exit_event().
         *
-        *  1) there is a lock recursion from perf_event_exit_task
-        *     see the comment there.
+        * Therefore, anybody acquiring event->child_mutex after the below
+        * loop _must_ also see this, most importantly inherit_event() which
+        * will avoid placing more children on the list.
         *
-        *  2) there is a lock-inversion with mmap_sem through
-        *     perf_read_group(), which takes faults while
-        *     holding ctx->mutex, however this is called after
-        *     the last filedesc died, so there is no possibility
-        *     to trigger the AB-BA case.
+        * Thus this guarantees that we will in fact observe and kill _ALL_
+        * child events.
         */
-       ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
-       WARN_ON_ONCE(ctx->parent_ctx);
-       perf_remove_from_context(event, true);
-       perf_event_ctx_unlock(event, ctx);
+       WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
 
-       _free_event(event);
-}
+again:
+       mutex_lock(&event->child_mutex);
+       list_for_each_entry(child, &event->child_list, child_list) {
 
-int perf_event_release_kernel(struct perf_event *event)
-{
+               /*
+                * Cannot change, child events are not migrated, see the
+                * comment with perf_event_ctx_lock_nested().
+                */
+               ctx = lockless_dereference(child->ctx);
+               /*
+                * Since child_mutex nests inside ctx::mutex, we must jump
+                * through hoops. We start by grabbing a reference on the ctx.
+                *
+                * Since the event cannot get freed while we hold the
+                * child_mutex, the context must also exist and have a !0
+                * reference count.
+                */
+               get_ctx(ctx);
+
+               /*
+                * Now that we have a ctx ref, we can drop child_mutex, and
+                * acquire ctx::mutex without fear of it going away. Then we
+                * can re-acquire child_mutex.
+                */
+               mutex_unlock(&event->child_mutex);
+               mutex_lock(&ctx->mutex);
+               mutex_lock(&event->child_mutex);
+
+               /*
+                * Now that we hold ctx::mutex and child_mutex, revalidate our
+                * state, if child is still the first entry, it didn't get freed
+                * and we can continue doing so.
+                */
+               tmp = list_first_entry_or_null(&event->child_list,
+                                              struct perf_event, child_list);
+               if (tmp == child) {
+                       perf_remove_from_context(child, DETACH_GROUP);
+                       list_del(&child->child_list);
+                       free_event(child);
+                       /*
+                        * This matches the refcount bump in inherit_event();
+                        * this can't be the last reference.
+                        */
+                       put_event(event);
+               }
+
+               mutex_unlock(&event->child_mutex);
+               mutex_unlock(&ctx->mutex);
+               put_ctx(ctx);
+               goto again;
+       }
+       mutex_unlock(&event->child_mutex);
+
+       /* Must be the last reference */
        put_event(event);
        return 0;
 }
@@ -3786,46 +3841,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-       put_event(file->private_data);
+       perf_event_release_kernel(file->private_data);
        return 0;
 }
 
-/*
- * Remove all orphanes events from the context.
- */
-static void orphans_remove_work(struct work_struct *work)
-{
-       struct perf_event_context *ctx;
-       struct perf_event *event, *tmp;
-
-       ctx = container_of(work, struct perf_event_context,
-                          orphans_remove.work);
-
-       mutex_lock(&ctx->mutex);
-       list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
-               struct perf_event *parent_event = event->parent;
-
-               if (!is_orphaned_child(event))
-                       continue;
-
-               perf_remove_from_context(event, true);
-
-               mutex_lock(&parent_event->child_mutex);
-               list_del_init(&event->child_list);
-               mutex_unlock(&parent_event->child_mutex);
-
-               free_event(event);
-               put_event(parent_event);
-       }
-
-       raw_spin_lock_irq(&ctx->lock);
-       ctx->orphans_remove_sched = false;
-       raw_spin_unlock_irq(&ctx->lock);
-       mutex_unlock(&ctx->mutex);
-
-       put_ctx(ctx);
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -4054,7 +4073,7 @@ static void _perf_event_reset(struct perf_event *event)
 /*
  * Holding the top-level event's child_mutex means that any
  * descendant process that has inherited this event will block
- * in sync_child_event if it goes to exit, thus satisfying the
+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
  * task existence requirements of perf_event_enable/disable.
  */
 static void perf_event_for_each_child(struct perf_event *event,
@@ -4086,36 +4105,14 @@ static void perf_event_for_each(struct perf_event *event,
                perf_event_for_each_child(sibling, func);
 }
 
-struct period_event {
-       struct perf_event *event;
-       u64 value;
-};
-
-static void ___perf_event_period(void *info)
-{
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       u64 value = pe->value;
-
-       if (event->attr.freq) {
-               event->attr.sample_freq = value;
-       } else {
-               event->attr.sample_period = value;
-               event->hw.sample_period = value;
-       }
-
-       local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       struct perf_event_context *ctx = event->ctx;
-       u64 value = pe->value;
+       u64 value = *((u64 *)info);
        bool active;
 
-       raw_spin_lock(&ctx->lock);
        if (event->attr.freq) {
                event->attr.sample_freq = value;
        } else {
@@ -4135,14 +4132,10 @@ static int __perf_event_period(void *info)
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(ctx->pmu);
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
-       struct period_event pe = { .event = event, };
        u64 value;
 
        if (!is_sampling_event(event))
@@ -4157,10 +4150,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        if (event->attr.freq && value > sysctl_perf_event_sample_rate)
                return -EINVAL;
 
-       pe.value = value;
-
-       event_function_call(event, __perf_event_period,
-                           ___perf_event_period, &pe);
+       event_function_call(event, __perf_event_period, &value);
 
        return 0;
 }
@@ -4872,9 +4862,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
        struct perf_event *event = filp->private_data;
        int retval;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        retval = fasync_helper(fd, filp, on, &event->fasync);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (retval < 0)
                return retval;
@@ -4932,7 +4922,7 @@ static void perf_pending_event(struct irq_work *entry)
 
        if (event->pending_disable) {
                event->pending_disable = 0;
-               __perf_event_disable(event);
+               perf_event_disable_local(event);
        }
 
        if (event->pending_wakeup) {
@@ -7753,11 +7743,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 
 static void account_event(struct perf_event *event)
 {
+       bool inc = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
        if (event->attr.comm)
@@ -7770,11 +7762,14 @@ static void account_event(struct perf_event *event)
        }
        if (event->attr.context_switch) {
                atomic_inc(&nr_switch_events);
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        }
        if (has_branch_stack(event))
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (is_cgroup_event(event))
+               inc = true;
+
+       if (inc)
                static_key_slow_inc(&perf_sched_events.key);
 
        account_event_cpu(event, event->cpu);
@@ -8422,11 +8417,11 @@ SYSCALL_DEFINE5(perf_event_open,
                 * See perf_event_ctx_lock() for comments on the details
                 * of swizzling perf_event::ctx.
                 */
-               perf_remove_from_context(group_leader, false);
+               perf_remove_from_context(group_leader, 0);
 
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
-                       perf_remove_from_context(sibling, false);
+                       perf_remove_from_context(sibling, 0);
                        put_ctx(gctx);
                }
 
@@ -8479,6 +8474,8 @@ SYSCALL_DEFINE5(perf_event_open,
        perf_event__header_size(event);
        perf_event__id_header_size(event);
 
+       event->owner = current;
+
        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);
 
@@ -8488,8 +8485,6 @@ SYSCALL_DEFINE5(perf_event_open,
 
        put_online_cpus();
 
-       event->owner = current;
-
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
@@ -8556,7 +8551,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        }
 
        /* Mark owner so we could distinguish it from user events. */
-       event->owner = EVENT_OWNER_KERNEL;
+       event->owner = TASK_TOMBSTONE;
 
        account_event(event);
 
@@ -8606,7 +8601,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
        list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
                                 event_entry) {
-               perf_remove_from_context(event, false);
+               perf_remove_from_context(event, 0);
                unaccount_event_cpu(event, src_cpu);
                put_ctx(src_ctx);
                list_add(&event->migrate_entry, &events);
@@ -8673,33 +8668,15 @@ static void sync_child_event(struct perf_event *child_event,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
                     &parent_event->child_total_time_running);
-
-       /*
-        * Remove this event from the parent's list
-        */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
-       list_del_init(&child_event->child_list);
-       mutex_unlock(&parent_event->child_mutex);
-
-       /*
-        * Make sure user/parent get notified, that we just
-        * lost one event.
-        */
-       perf_event_wakeup(parent_event);
-
-       /*
-        * Release the parent event, if this was the last
-        * reference to it.
-        */
-       put_event(parent_event);
 }
 
 static void
-__perf_event_exit_task(struct perf_event *child_event,
-                        struct perf_event_context *child_ctx,
-                        struct task_struct *child)
+perf_event_exit_event(struct perf_event *child_event,
+                     struct perf_event_context *child_ctx,
+                     struct task_struct *child)
 {
+       struct perf_event *parent_event = child_event->parent;
+
        /*
         * Do not destroy the 'original' grouping; because of the context
         * switch optimization the original events could've ended up in a
@@ -8712,57 +8689,86 @@ __perf_event_exit_task(struct perf_event *child_event,
         * Do destroy all inherited groups, we don't care about those
         * and being thorough is better.
         */
-       perf_remove_from_context(child_event, !!child_event->parent);
+       raw_spin_lock_irq(&child_ctx->lock);
+       WARN_ON_ONCE(child_ctx->is_active);
+
+       if (parent_event)
+               perf_group_detach(child_event);
+       list_del_event(child_event, child_ctx);
+       child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        /*
-        * It can happen that the parent exits first, and has events
-        * that are still around due to the child reference. These
-        * events need to be zapped.
+        * Parent events are governed by their filedesc, retain them.
         */
-       if (child_event->parent) {
-               sync_child_event(child_event, child);
-               free_event(child_event);
-       } else {
-               child_event->state = PERF_EVENT_STATE_EXIT;
+       if (!parent_event) {
                perf_event_wakeup(child_event);
+               return;
        }
+       /*
+        * Child events can be cleaned up.
+        */
+
+       sync_child_event(child_event, child);
+
+       /*
+        * Remove this event from the parent's list
+        */
+       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+       mutex_lock(&parent_event->child_mutex);
+       list_del_init(&child_event->child_list);
+       mutex_unlock(&parent_event->child_mutex);
+
+       /*
+        * Kick perf_poll() for is_event_hup().
+        */
+       perf_event_wakeup(parent_event);
+       free_event(child_event);
+       put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event, *next;
        struct perf_event_context *child_ctx, *clone_ctx = NULL;
-       unsigned long flags;
+       struct perf_event *child_event, *next;
+
+       WARN_ON_ONCE(child != current);
 
-       if (likely(!child->perf_event_ctxp[ctxn]))
+       child_ctx = perf_pin_task_context(child, ctxn);
+       if (!child_ctx)
                return;
 
-       local_irq_save(flags);
        /*
-        * We can't reschedule here because interrupts are disabled,
-        * and either child is current or it is a task that can't be
-        * scheduled, so we are now safe from rescheduling changing
-        * our context.
+        * In order to reduce the amount of tricky in ctx tear-down, we hold
+        * ctx::mutex over the entire thing. This serializes against almost
+        * everything that wants to access the ctx.
+        *
+        * The exception is sys_perf_event_open() /
+        * perf_event_create_kernel_count() which does find_get_context()
+        * without ctx::mutex (it cannot because of the move_group double mutex
+        * lock thing). See the comments in perf_install_in_context().
         */
-       child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+       mutex_lock(&child_ctx->mutex);
 
        /*
-        * Take the context lock here so that if find_get_context is
-        * reading child->perf_event_ctxp, we wait until it has
-        * incremented the context's refcount before we do put_ctx below.
+        * In a single ctx::lock section, de-schedule the events and detach the
+        * context from the task such that we cannot ever get it scheduled back
+        * in.
         */
-       raw_spin_lock(&child_ctx->lock);
-       task_ctx_sched_out(child_ctx);
-       child->perf_event_ctxp[ctxn] = NULL;
+       raw_spin_lock_irq(&child_ctx->lock);
+       task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
        /*
-        * If this context is a clone; unclone it so it can't get
-        * swapped to another process while we're removing all
-        * the events from it.
+        * Now that the context is inactive, destroy the task <-> ctx relation
+        * and mark the context dead.
         */
+       RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
+       put_ctx(child_ctx); /* cannot be last */
+       WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+       put_task_struct(current); /* cannot be last */
+
        clone_ctx = unclone_ctx(child_ctx);
-       update_context_time(child_ctx);
-       raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        if (clone_ctx)
                put_ctx(clone_ctx);
@@ -8774,20 +8780,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        perf_event_task(child, child_ctx, 0);
 
-       /*
-        * We can recurse on the same lock type through:
-        *
-        *   __perf_event_exit_task()
-        *     sync_child_event()
-        *       put_event()
-        *         mutex_lock(&ctx->mutex)
-        *
-        * But since its the parent context it won't be the same instance.
-        */
-       mutex_lock(&child_ctx->mutex);
-
        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-               __perf_event_exit_task(child_event, child_ctx, child);
+               perf_event_exit_event(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
 
@@ -8812,8 +8806,7 @@ void perf_event_exit_task(struct task_struct *child)
                 * the owner, closes a race against perf_release() where
                 * we need to serialize on the owner->perf_event_mutex.
                 */
-               smp_wmb();
-               event->owner = NULL;
+               smp_store_release(&event->owner, NULL);
        }
        mutex_unlock(&child->perf_event_mutex);
 
@@ -8896,21 +8889,20 @@ void perf_event_delayed_put(struct task_struct *task)
                WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-       int err;
-       struct fd f;
-       struct perf_event *event;
+       struct file *file;
 
-       err = perf_fget_light(fd, &f);
-       if (err)
-               return ERR_PTR(err);
+       file = fget_raw(fd);
+       if (!file)
+               return ERR_PTR(-EBADF);
 
-       event = f.file->private_data;
-       atomic_long_inc(&event->refcount);
-       fdput(f);
+       if (file->f_op != &perf_fops) {
+               fput(file);
+               return ERR_PTR(-EBADF);
+       }
 
-       return event;
+       return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
@@ -8953,8 +8945,16 @@ inherit_event(struct perf_event *parent_event,
        if (IS_ERR(child_event))
                return child_event;
 
+       /*
+        * is_orphaned_event() and list_add_tail(&parent_event->child_list)
+        * must be under the same lock in order to serialize against
+        * perf_event_release_kernel(), such that either we must observe
+        * is_orphaned_event() or they will observe us on the child_list.
+        */
+       mutex_lock(&parent_event->child_mutex);
        if (is_orphaned_event(parent_event) ||
            !atomic_long_inc_not_zero(&parent_event->refcount)) {
+               mutex_unlock(&parent_event->child_mutex);
                free_event(child_event);
                return NULL;
        }
@@ -9002,8 +9002,6 @@ inherit_event(struct perf_event *parent_event,
        /*
         * Link this into the parent event's child list
         */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
        list_add_tail(&child_event->child_list, &parent_event->child_list);
        mutex_unlock(&parent_event->child_mutex);
 
@@ -9221,13 +9219,14 @@ static void perf_event_init_cpu(int cpu)
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
-       struct remove_event re = { .detach_group = true };
        struct perf_event_context *ctx = __info;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event *event;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-               __perf_remove_from_context(&re);
-       rcu_read_unlock();
+       raw_spin_lock(&ctx->lock);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
+       raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
index 92ce5f4ccc264e01a5551965d173e39e41392143..3f8cb1e14588fe3258a1c3aa49874e008f5a266a 100644 (file)
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
         * current task.
         */
        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
-               __perf_event_disable(bp);
+               perf_event_disable_local(bp);
        else
                perf_event_disable(bp);
 
index adfdc0536117c1f10bf8f0dd8014798011e1f855..1faad2cfdb9e4572c91431e4b11941266e23a231 100644 (file)
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
        __free_page(page);
 }
 
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+       int pg;
+
+       if (rb->aux_priv) {
+               rb->free_aux(rb->aux_priv);
+               rb->free_aux = NULL;
+               rb->aux_priv = NULL;
+       }
+
+       if (rb->aux_nr_pages) {
+               for (pg = 0; pg < rb->aux_nr_pages; pg++)
+                       rb_free_aux_page(rb, pg);
+
+               kfree(rb->aux_pages);
+               rb->aux_nr_pages = 0;
+       }
+}
+
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
                 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
@@ -547,30 +566,11 @@ out:
        if (!ret)
                rb->aux_pgoff = pgoff;
        else
-               rb_free_aux(rb);
+               __rb_free_aux(rb);
 
        return ret;
 }
 
-static void __rb_free_aux(struct ring_buffer *rb)
-{
-       int pg;
-
-       if (rb->aux_priv) {
-               rb->free_aux(rb->aux_priv);
-               rb->free_aux = NULL;
-               rb->aux_priv = NULL;
-       }
-
-       if (rb->aux_nr_pages) {
-               for (pg = 0; pg < rb->aux_nr_pages; pg++)
-                       rb_free_aux_page(rb, pg);
-
-               kfree(rb->aux_pages);
-               rb->aux_nr_pages = 0;
-       }
-}
-
 void rb_free_aux(struct ring_buffer *rb)
 {
        if (atomic_dec_and_test(&rb->aux_refcount))
index 0773f2b23b107dcc3be9e5caabdbaa0a25de01d0..5d6ce6413ef1d227b32c99a4330bee1289f9f571 100644 (file)
@@ -1191,7 +1191,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        if (pi_state->owner != current)
                return -EINVAL;
 
-       raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
        /*
@@ -1217,22 +1217,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        else if (curval != uval)
                ret = -EINVAL;
        if (ret) {
-               raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
                return ret;
        }
 
-       raw_spin_lock_irq(&pi_state->owner->pi_lock);
+       raw_spin_lock(&pi_state->owner->pi_lock);
        WARN_ON(list_empty(&pi_state->list));
        list_del_init(&pi_state->list);
-       raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+       raw_spin_unlock(&pi_state->owner->pi_lock);
 
-       raw_spin_lock_irq(&new_owner->pi_lock);
+       raw_spin_lock(&new_owner->pi_lock);
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &new_owner->pi_state_list);
        pi_state->owner = new_owner;
-       raw_spin_unlock_irq(&new_owner->pi_lock);
+       raw_spin_unlock(&new_owner->pi_lock);
 
-       raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
        deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
 
@@ -2127,11 +2127,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
                 * we returned due to timeout or signal without taking the
                 * rt_mutex. Too late.
                 */
-               raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
                owner = rt_mutex_owner(&q->pi_state->pi_mutex);
                if (!owner)
                        owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-               raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
                ret = fixup_pi_state_owner(uaddr, q, owner);
                goto out;
        }
index a302cf9a2126c8a911ff4da1a944c88c6d3b3f8a..57bff7857e879bf2301a92723ade0a968870e637 100644 (file)
@@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
        unsigned int flags = 0, irq = desc->irq_data.irq;
        struct irqaction *action = desc->action;
 
-       do {
+       /* action might have become NULL since we dropped the lock */
+       while (action) {
                irqreturn_t res;
 
                trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
                retval |= res;
                action = action->next;
-       } while (action);
+       }
 
        add_interrupt_randomness(irq, flags);
 
index 6e655f7acd3bf79b68f62fc4fbdf6806cfd11884..3e56d2f03e24e6ee147e837727e6aac7a479f9aa 100644 (file)
@@ -575,10 +575,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
        unsigned int type = IRQ_TYPE_NONE;
        int virq;
 
-       if (fwspec->fwnode)
-               domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
-       else
+       if (fwspec->fwnode) {
+               domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                 DOMAIN_BUS_WIRED);
+               if (!domain)
+                       domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                         DOMAIN_BUS_ANY);
+       } else {
                domain = irq_default_domain;
+       }
 
        if (!domain) {
                pr_warn("no irq domain found for %s !\n",
index 8251e75dd9c0bd67337754baf6f03fb2cf1f956f..3e746607abe5230bb9c650957582669c085d9a90 100644 (file)
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  * 2) Drop lock->wait_lock
  * 3) Try to unlock the lock with cmpxchg
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        struct task_struct *owner = rt_mutex_owner(lock);
 
        clear_rt_mutex_waiters(lock);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        /*
         * If a new waiter comes in between the unlock and the cmpxchg
         * we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 /*
  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        lock->owner = NULL;
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        return true;
 }
 #endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        int ret = 0, depth = 0;
        struct rt_mutex *lock;
        bool detect_deadlock;
-       unsigned long flags;
        bool requeue = true;
 
        detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        /*
         * [1] Task cannot go away as we did a get_task() before !
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock_irq(&task->pi_lock);
 
        /*
         * [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
         * operations.
         */
        if (!raw_spin_trylock(&lock->wait_lock)) {
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock_irq(&task->pi_lock);
                cpu_relax();
                goto retry;
        }
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                /*
                 * No requeue[7] here. Just release @task [8]
                 */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock(&task->pi_lock);
                put_task_struct(task);
 
                /*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 * If there is no owner of the lock, end of chain.
                 */
                if (!rt_mutex_owner(lock)) {
-                       raw_spin_unlock(&lock->wait_lock);
+                       raw_spin_unlock_irq(&lock->wait_lock);
                        return 0;
                }
 
                /* [10] Grab the next task, i.e. owner of @lock */
                task = rt_mutex_owner(lock);
                get_task_struct(task);
-               raw_spin_lock_irqsave(&task->pi_lock, flags);
+               raw_spin_lock(&task->pi_lock);
 
                /*
                 * No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                top_waiter = rt_mutex_top_waiter(lock);
 
                /* [13] Drop locks */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock(&task->pi_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                /* If owner is not blocked, end of chain. */
                if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        rt_mutex_enqueue(lock, waiter);
 
        /* [8] Release the task */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
        put_task_struct(task);
 
        /*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 */
                if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
                        wake_up_process(rt_mutex_top_waiter(lock)->task);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 0;
        }
 
        /* [10] Grab the next task, i.e. the owner of @lock */
        task = rt_mutex_owner(lock);
        get_task_struct(task);
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
 
        /* [11] requeue the pi waiters if necessary */
        if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        top_waiter = rt_mutex_top_waiter(lock);
 
        /* [13] Drop the locks */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock(&task->pi_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        /*
         * Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        goto again;
 
  out_unlock_pi:
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock_irq(&task->pi_lock);
  out_put_task:
        put_task_struct(task);
 
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 /*
  * Try to take an rt-mutex
  *
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
  *
  * @lock:   The lock to be acquired.
  * @task:   The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
                                struct rt_mutex_waiter *waiter)
 {
-       unsigned long flags;
-
        /*
         * Before testing whether we can acquire @lock, we set the
         * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         * case, but conditionals are more expensive than a redundant
         * store.
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        task->pi_blocked_on = NULL;
        /*
         * Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         */
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
 takeit:
        /* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
  *
  * Prepare waiter and propagate pi chain
  *
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                                   struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        struct rt_mutex_waiter *top_waiter = waiter;
        struct rt_mutex *next_lock;
        int chain_walk = 0, res;
-       unsigned long flags;
 
        /*
         * Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        if (owner == task)
                return -EDEADLK;
 
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        __rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
        task->pi_blocked_on = waiter;
 
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
        if (!owner)
                return 0;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
        if (waiter == rt_mutex_top_waiter(lock)) {
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
        /*
         * Even if full deadlock detection is on, if the owner is not
         * blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
         */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
                                         next_lock, waiter, task);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        return res;
 }
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  * Remove the top waiter from the current tasks pi waiter tree and
  * queue it up.
  *
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
  */
 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
                                    struct rt_mutex *lock)
 {
        struct rt_mutex_waiter *waiter;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
 
        waiter = rt_mutex_top_waiter(lock);
 
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
         */
        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        wake_q_add(wake_q, waiter->task);
 }
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 /*
  * Remove a waiter from a lock and give up
  *
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
  * have just failed to try_to_take_rt_mutex().
  */
 static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
        bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
        struct task_struct *owner = rt_mutex_owner(lock);
        struct rt_mutex *next_lock;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
        rt_mutex_dequeue(lock, waiter);
        current->pi_blocked_on = NULL;
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        /*
         * Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
        if (!owner || !is_top_waiter)
                return;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
 
        rt_mutex_dequeue_pi(owner, waiter);
 
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
 
        /*
         * Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
                                   next_lock, NULL, current);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 }
 
 /*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:               the rt_mutex to take
  * @state:              the state the task should block in (TASK_INTERRUPTIBLE
- *                      or TASK_UNINTERRUPTIBLE)
+ *                      or TASK_UNINTERRUPTIBLE)
  * @timeout:            the pre-initialized and started timer, or NULL for none
  * @waiter:             the pre-initialized rt_mutex_waiter
  *
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched
 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
                                break;
                }
 
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                debug_rt_mutex_print_deadlock(waiter);
 
                schedule();
 
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irq(&lock->wait_lock);
                set_current_state(state);
        }
 
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                  enum rtmutex_chainwalk chwalk)
 {
        struct rt_mutex_waiter waiter;
+       unsigned long flags;
        int ret = 0;
 
        debug_rt_mutex_init_waiter(&waiter);
        RB_CLEAR_NODE(&waiter.pi_tree_entry);
        RB_CLEAR_NODE(&waiter.tree_entry);
 
-       raw_spin_lock(&lock->wait_lock);
+       /*
+        * Technically we could use raw_spin_[un]lock_irq() here, but this can
+        * be called in early boot if the cmpxchg() fast path is disabled
+        * (debug, no architecture support). In this case we will acquire the
+        * rtmutex with lock->wait_lock held. But we cannot unconditionally
+        * enable interrupts in that early boot case. So we need to use the
+        * irqsave/restore variants.
+        */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        /* Try to acquire the lock again: */
        if (try_to_take_rt_mutex(lock, current, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
                return 0;
        }
 
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* Remove pending timer: */
        if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  */
 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
+       unsigned long flags;
        int ret;
 
        /*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
                return 0;
 
        /*
-        * The mutex has currently no owner. Lock the wait lock and
-        * try to acquire the lock.
+        * The mutex has currently no owner. Lock the wait lock and try to
+        * acquire the lock. We use irqsave here to support early boot calls.
         */
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        return ret;
 }
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
                                        struct wake_q_head *wake_q)
 {
-       raw_spin_lock(&lock->wait_lock);
+       unsigned long flags;
+
+       /* irqsave required to support early boot calls */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        debug_rt_mutex_unlock(lock);
 
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        while (!rt_mutex_has_waiters(lock)) {
                /* Drops lock->wait_lock ! */
-               if (unlock_rt_mutex_safe(lock) == true)
+               if (unlock_rt_mutex_safe(lock, flags) == true)
                        return false;
                /* Relock the rtmutex and try again */
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irqsave(&lock->wait_lock, flags);
        }
 
        /*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        mark_wakeup_next_waiter(wake_q, lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* check PI boosting */
        return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        if (try_to_take_rt_mutex(lock, task, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 1;
        }
 
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
        if (unlikely(ret))
                remove_waiter(lock, waiter);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        debug_rt_mutex_print_deadlock(waiter);
 
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        return ret;
 }
index e517a16cb426cc30b0251a43d22c5d2cb01bead9..70ee3775de24ebcf2e80ab2fdad21eaad24dd2fe 100644 (file)
@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
 }
 EXPORT_SYMBOL(devm_memunmap);
 
-pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
        return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
 }
@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
 
 static void pgmap_radix_release(struct resource *res)
 {
-       resource_size_t key;
+       resource_size_t key, align_start, align_size, align_end;
+
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_end = align_start + align_size - 1;
 
        mutex_lock(&pgmap_lock);
        for (key = res->start; key <= res->end; key += SECTION_SIZE)
@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
                percpu_ref_put(pgmap->ref);
        }
 
-       pgmap_radix_release(res);
-
        /* pages are dead and unused, undo the arch mapping */
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(resource_size(res), SECTION_SIZE);
        arch_remove_memory(align_start, align_size);
+       pgmap_radix_release(res);
        dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
                        "%s: failed to free all reserved pages\n", __func__);
 }
@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 {
        int is_ram = region_intersects(res->start, resource_size(res),
                        "System RAM");
-       resource_size_t key, align_start, align_size;
+       resource_size_t key, align_start, align_size, align_end;
        struct dev_pagemap *pgmap;
        struct page_map *page_map;
        unsigned long pfn;
@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 
        mutex_lock(&pgmap_lock);
        error = 0;
-       for (key = res->start; key <= res->end; key += SECTION_SIZE) {
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
+       align_end = align_start + align_size - 1;
+       for (key = align_start; key <= align_end; key += SECTION_SIZE) {
                struct dev_pagemap *dup;
 
                rcu_read_lock();
@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (nid < 0)
                nid = numa_mem_id();
 
-       align_start = res->start & ~(SECTION_SIZE - 1);
-       align_size = ALIGN(resource_size(res), SECTION_SIZE);
        error = arch_add_memory(nid, align_start, align_size, true);
        if (error)
                goto err_add_memory;
index 8358f4697c0c3aea77aba802833660d4082b7c13..9537da37ce87fe28cf11ee97d78bfa17ddc6beb3 100644 (file)
@@ -303,6 +303,9 @@ struct load_info {
        struct _ddebug *debug;
        unsigned int num_debug;
        bool sig_ok;
+#ifdef CONFIG_KALLSYMS
+       unsigned long mod_kallsyms_init_off;
+#endif
        struct {
                unsigned int sym, str, mod, vers, info, pcpu;
        } index;
@@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct load_info *info)
        strsect->sh_flags |= SHF_ALLOC;
        strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
                                         info->index.str) | INIT_OFFSET_MASK;
-       mod->init_layout.size = debug_align(mod->init_layout.size);
        pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
+
+       /* We'll tack temporary mod_kallsyms on the end. */
+       mod->init_layout.size = ALIGN(mod->init_layout.size,
+                                     __alignof__(struct mod_kallsyms));
+       info->mod_kallsyms_init_off = mod->init_layout.size;
+       mod->init_layout.size += sizeof(struct mod_kallsyms);
+       mod->init_layout.size = debug_align(mod->init_layout.size);
 }
 
+/*
+ * We use the full symtab and strtab which layout_symtab arranged to
+ * be appended to the init section.  Later we switch to the cut-down
+ * core-only ones.
+ */
 static void add_kallsyms(struct module *mod, const struct load_info *info)
 {
        unsigned int i, ndst;
@@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
        char *s;
        Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
 
-       mod->symtab = (void *)symsec->sh_addr;
-       mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+       /* Set up to point into init section. */
+       mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
+
+       mod->kallsyms->symtab = (void *)symsec->sh_addr;
+       mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
        /* Make sure we get permanent strtab: don't use info->strtab. */
-       mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
+       mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
 
        /* Set types up while we still have access to sections. */
-       for (i = 0; i < mod->num_symtab; i++)
-               mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
-
-       mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
-       mod->core_strtab = s = mod->core_layout.base + info->stroffs;
-       src = mod->symtab;
-       for (ndst = i = 0; i < mod->num_symtab; i++) {
+       for (i = 0; i < mod->kallsyms->num_symtab; i++)
+               mod->kallsyms->symtab[i].st_info
+                       = elf_type(&mod->kallsyms->symtab[i], info);
+
+       /* Now populate the cut down core kallsyms for after init. */
+       mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
+       mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
+       src = mod->kallsyms->symtab;
+       for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
                if (i == 0 ||
                    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
                                   info->index.pcpu)) {
                        dst[ndst] = src[i];
-                       dst[ndst++].st_name = s - mod->core_strtab;
-                       s += strlcpy(s, &mod->strtab[src[i].st_name],
+                       dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
+                       s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
                                     KSYM_NAME_LEN) + 1;
                }
        }
-       mod->core_num_syms = ndst;
+       mod->core_kallsyms.num_symtab = ndst;
 }
 #else
 static inline void layout_symtab(struct module *mod, struct load_info *info)
@@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod)
        module_put(mod);
        trim_init_extable(mod);
 #ifdef CONFIG_KALLSYMS
-       mod->num_symtab = mod->core_num_syms;
-       mod->symtab = mod->core_symtab;
-       mod->strtab = mod->core_strtab;
+       /* Switch to core kallsyms now init is done: kallsyms may be walking! */
+       rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
 #endif
        mod_tree_remove_init(mod);
        disable_ro_nx(&mod->init_layout);
@@ -3496,7 +3514,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 
        /* Module is ready to execute: parsing args may do that. */
        after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-                                 -32768, 32767, NULL,
+                                 -32768, 32767, mod,
                                  unknown_module_param_cb);
        if (IS_ERR(after_dashes)) {
                err = PTR_ERR(after_dashes);
@@ -3627,6 +3645,11 @@ static inline int is_arm_mapping_symbol(const char *str)
               && (str[2] == '\0' || str[2] == '.');
 }
 
+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
+{
+       return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
+}
+
 static const char *get_ksymbol(struct module *mod,
                               unsigned long addr,
                               unsigned long *size,
@@ -3634,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod,
 {
        unsigned int i, best = 0;
        unsigned long nextval;
+       struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
        /* At worse, next value is at end of module */
        if (within_module_init(addr, mod))
@@ -3643,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod,
 
        /* Scan for closest preceding symbol, and next symbol. (ELF
           starts real symbols at 1). */
-       for (i = 1; i < mod->num_symtab; i++) {
-               if (mod->symtab[i].st_shndx == SHN_UNDEF)
+       for (i = 1; i < kallsyms->num_symtab; i++) {
+               if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
                        continue;
 
                /* We ignore unnamed symbols: they're uninformative
                 * and inserted at a whim. */
-               if (mod->symtab[i].st_value <= addr
-                   && mod->symtab[i].st_value > mod->symtab[best].st_value
-                   && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-                   && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
+               if (*symname(kallsyms, i) == '\0'
+                   || is_arm_mapping_symbol(symname(kallsyms, i)))
+                       continue;
+
+               if (kallsyms->symtab[i].st_value <= addr
+                   && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
                        best = i;
-               if (mod->symtab[i].st_value > addr
-                   && mod->symtab[i].st_value < nextval
-                   && *(mod->strtab + mod->symtab[i].st_name) != '\0'
-                   && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
-                       nextval = mod->symtab[i].st_value;
+               if (kallsyms->symtab[i].st_value > addr
+                   && kallsyms->symtab[i].st_value < nextval)
+                       nextval = kallsyms->symtab[i].st_value;
        }
 
        if (!best)
                return NULL;
 
        if (size)
-               *size = nextval - mod->symtab[best].st_value;
+               *size = nextval - kallsyms->symtab[best].st_value;
        if (offset)
-               *offset = addr - mod->symtab[best].st_value;
-       return mod->strtab + mod->symtab[best].st_name;
+               *offset = addr - kallsyms->symtab[best].st_value;
+       return symname(kallsyms, best);
 }
 
 /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
@@ -3758,19 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 
        preempt_disable();
        list_for_each_entry_rcu(mod, &modules, list) {
+               struct mod_kallsyms *kallsyms;
+
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
-               if (symnum < mod->num_symtab) {
-                       *value = mod->symtab[symnum].st_value;
-                       *type = mod->symtab[symnum].st_info;
-                       strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
-                               KSYM_NAME_LEN);
+               kallsyms = rcu_dereference_sched(mod->kallsyms);
+               if (symnum < kallsyms->num_symtab) {
+                       *value = kallsyms->symtab[symnum].st_value;
+                       *type = kallsyms->symtab[symnum].st_info;
+                       strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
                        strlcpy(module_name, mod->name, MODULE_NAME_LEN);
                        *exported = is_exported(name, *value, mod);
                        preempt_enable();
                        return 0;
                }
-               symnum -= mod->num_symtab;
+               symnum -= kallsyms->num_symtab;
        }
        preempt_enable();
        return -ERANGE;
@@ -3779,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 static unsigned long mod_find_symname(struct module *mod, const char *name)
 {
        unsigned int i;
+       struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
 
-       for (i = 0; i < mod->num_symtab; i++)
-               if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
-                   mod->symtab[i].st_info != 'U')
-                       return mod->symtab[i].st_value;
+       for (i = 0; i < kallsyms->num_symtab; i++)
+               if (strcmp(name, symname(kallsyms, i)) == 0 &&
+                   kallsyms->symtab[i].st_info != 'U')
+                       return kallsyms->symtab[i].st_value;
        return 0;
 }
 
@@ -3822,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
        module_assert_mutex();
 
        list_for_each_entry(mod, &modules, list) {
+               /* We hold module_mutex: no need for rcu_dereference_sched */
+               struct mod_kallsyms *kallsyms = mod->kallsyms;
+
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
-               for (i = 0; i < mod->num_symtab; i++) {
-                       ret = fn(data, mod->strtab + mod->symtab[i].st_name,
-                                mod, mod->symtab[i].st_value);
+               for (i = 0; i < kallsyms->num_symtab; i++) {
+                       ret = fn(data, symname(kallsyms, i),
+                                mod, kallsyms->symtab[i].st_value);
                        if (ret != 0)
                                return ret;
                }
index f4ad91b746f1e4afd96b3615a4bad844a0875528..4d73a834c7e6590e29eb0d1a71645caf6c274016 100644 (file)
@@ -588,7 +588,7 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-       /* Veryify no one has done anything silly */
+       /* Verify no one has done anything silly: */
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
 
        /* bump default and minimum pid_max based on number of cpus */
index 02e8dfaa1ce2d2085bd85e165ad6f25b9697a11a..68d3ebc12601b6bdea4f9d2c6cfca997a0b6a0dc 100644 (file)
@@ -235,7 +235,7 @@ config PM_TRACE_RTC
 
 config APM_EMULATION
        tristate "Advanced Power Management Emulation"
-       depends on PM && SYS_SUPPORTS_APM_EMULATION
+       depends on SYS_SUPPORTS_APM_EMULATION
        help
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
index 0b4570cfacaeb2f5290d8f4b98d256943d22333c..074994bcfa9be14336ecb06912a969f6d9546c9d 100644 (file)
@@ -1133,7 +1133,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
        if (!desc->count)
                return 0;
 
-       mutex_lock(&file_inode(filp)->i_mutex);
+       inode_lock(file_inode(filp));
        do {
                if (!relay_file_read_avail(buf, *ppos))
                        break;
@@ -1153,7 +1153,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
                        *ppos = relay_file_read_end_pos(buf, read_start, ret);
                }
        } while (desc->count && ret);
-       mutex_unlock(&file_inode(filp)->i_mutex);
+       inode_unlock(file_inode(filp));
 
        return desc->written;
 }
index 44253adb3c36dc28fb84cf9fdda4609f59dc388c..9503d590e5ef5b81537947b9925ecfe689f72a91 100644 (file)
@@ -222,9 +222,9 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 
        /* Ensure the static_key remains in a consistent state */
        inode = file_inode(filp);
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        i = sched_feat_set(cmp);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if (i == __SCHED_FEAT_NR)
                return -EINVAL;
 
@@ -6840,7 +6840,7 @@ static void sched_init_numa(void)
 
                        sched_domains_numa_masks[i][j] = mask;
 
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                if (node_distance(j, k) > sched_domains_numa_distance[i])
                                        continue;
 
index 1926606ece807361ab02ef51f4864543640e8d8d..56b7d4b839476b6ed1692e786abe9ed6cda64a5f 100644 (file)
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env,
 {
        if (env->best_task)
                put_task_struct(env->best_task);
-       if (p)
-               get_task_struct(p);
 
        env->best_task = p;
        env->best_imp = imp;
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env,
        long imp = env->p->numa_group ? groupimp : taskimp;
        long moveimp = imp;
        int dist = env->dist;
+       bool assigned = false;
 
        rcu_read_lock();
 
        raw_spin_lock_irq(&dst_rq->lock);
        cur = dst_rq->curr;
        /*
-        * No need to move the exiting task, and this ensures that ->curr
-        * wasn't reaped and thus get_task_struct() in task_numa_assign()
-        * is safe under RCU read lock.
-        * Note that rcu_read_lock() itself can't protect from the final
-        * put_task_struct() after the last schedule().
+        * No need to move the exiting task or idle task.
         */
        if ((cur->flags & PF_EXITING) || is_idle_task(cur))
                cur = NULL;
+       else {
+               /*
+                * The task_struct must be protected here to protect the
+                * p->numa_faults access in the task_weight since the
+                * numa_faults could already be freed in the following path:
+                * finish_task_switch()
+                *     --> put_task_struct()
+                *         --> __put_task_struct()
+                *             --> task_numa_free()
+                */
+               get_task_struct(cur);
+       }
+
        raw_spin_unlock_irq(&dst_rq->lock);
 
        /*
@@ -1386,6 +1394,7 @@ balance:
                 */
                if (!load_too_imbalanced(src_load, dst_load, env)) {
                        imp = moveimp - 1;
+                       put_task_struct(cur);
                        cur = NULL;
                        goto assign;
                }
@@ -1411,9 +1420,16 @@ balance:
                env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
+       assigned = true;
        task_numa_assign(env, cur, imp);
 unlock:
        rcu_read_unlock();
+       /*
+        * The dst_rq->curr isn't assigned. The protection for task_struct is
+        * finished.
+        */
+       if (cur && !assigned)
+               put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,
index de0e786b26671b633ccaa56d9ebba6ce7b9d77cf..544a7133cbd1df2a6ee4369231148fc0309eae11 100644 (file)
@@ -162,7 +162,7 @@ static void cpuidle_idle_call(void)
         */
        if (idle_should_freeze()) {
                entered_state = cpuidle_enter_freeze(drv, dev);
-               if (entered_state >= 0) {
+               if (entered_state > 0) {
                        local_irq_enable();
                        goto exit_idle;
                }
index 580ac2d4024ffbdb29960ccdd86424fa730b1e78..15a1795bbba17d1140e3220c41d48dd26ac43c95 100644 (file)
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
                put_seccomp_filter(thread);
                smp_store_release(&thread->seccomp.filter,
                                  caller->seccomp.filter);
+
+               /*
+                * Don't let an unprivileged task work around
+                * the no_new_privs restriction by creating
+                * a thread that sets it up, enters seccomp,
+                * then dies.
+                */
+               if (task_no_new_privs(caller))
+                       task_set_no_new_privs(thread);
+
                /*
                 * Opt the other thread into seccomp if needed.
                 * As threads are considered to be trust-realm
                 * equivalent (see ptrace_may_access), it is safe to
                 * allow one thread to transition the other.
                 */
-               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-                       /*
-                        * Don't let an unprivileged task work around
-                        * the no_new_privs restriction by creating
-                        * a thread that sets it up, enters seccomp,
-                        * then dies.
-                        */
-                       if (task_no_new_privs(caller))
-                               task_set_no_new_privs(thread);
-
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
                        seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-               }
        }
 }
 
index f3f1f7a972fd40f3d437d6bf3b5db2cacaffe6ca..0508544c8ced0d96913905dc53af68f38b6ee618 100644 (file)
@@ -3508,8 +3508,10 @@ static int sigsuspend(sigset_t *set)
        current->saved_sigmask = current->blocked;
        set_current_blocked(set);
 
-       __set_current_state(TASK_INTERRUPTIBLE);
-       schedule();
+       while (!signal_pending(current)) {
+               __set_current_state(TASK_INTERRUPTIBLE);
+               schedule();
+       }
        set_restore_sigmask();
        return -ERESTARTNOHAND;
 }
index 91420362e0b339fcdfe930066dab863604357b13..97715fd9e790ade5d7cd7731107de2dafb8272e3 100644 (file)
@@ -1757,6 +1757,20 @@ static struct ctl_table fs_table[] = {
                .proc_handler   = &pipe_proc_fn,
                .extra1         = &pipe_min_size,
        },
+       {
+               .procname       = "pipe-user-pages-hard",
+               .data           = &pipe_user_pages_hard,
+               .maxlen         = sizeof(pipe_user_pages_hard),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
+       {
+               .procname       = "pipe-user-pages-soft",
+               .data           = &pipe_user_pages_soft,
+               .maxlen         = sizeof(pipe_user_pages_soft),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
        { }
 };
 
index 435b8850dd80a300c11ad128bc0cf51e3c23e15a..fa909f9fd5591801968523032e00f8fe816fb23f 100644 (file)
@@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtimer *timer,
  */
 static void __remove_hrtimer(struct hrtimer *timer,
                             struct hrtimer_clock_base *base,
-                            unsigned long newstate, int reprogram)
+                            u8 newstate, int reprogram)
 {
        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-       unsigned int state = timer->state;
+       u8 state = timer->state;
 
        timer->state = newstate;
        if (!(state & HRTIMER_STATE_ENQUEUED))
@@ -930,7 +930,7 @@ static inline int
 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
 {
        if (hrtimer_is_queued(timer)) {
-               unsigned long state = timer->state;
+               u8 state = timer->state;
                int reprogram;
 
                /*
@@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
        return 0;
 }
 
+static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
+                                           const enum hrtimer_mode mode)
+{
+#ifdef CONFIG_TIME_LOW_RES
+       /*
+        * CONFIG_TIME_LOW_RES indicates that the system has no way to return
+        * granular time values. For relative timers we add hrtimer_resolution
+        * (i.e. one jiffie) to prevent short timeouts.
+        */
+       timer->is_rel = mode & HRTIMER_MODE_REL;
+       if (timer->is_rel)
+               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+#endif
+       return tim;
+}
+
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
  * @timer:     the timer to be added
@@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        /* Remove an active timer from the queue: */
        remove_hrtimer(timer, base, true);
 
-       if (mode & HRTIMER_MODE_REL) {
+       if (mode & HRTIMER_MODE_REL)
                tim = ktime_add_safe(tim, base->get_time());
-               /*
-                * CONFIG_TIME_LOW_RES is a temporary way for architectures
-                * to signal that they simply return xtime in
-                * do_gettimeoffset(). In this case we want to round up by
-                * resolution when starting a relative timer, to avoid short
-                * timeouts. This will go away with the GTOD framework.
-                */
-#ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
-#endif
-       }
+
+       tim = hrtimer_update_lowres(timer, tim, mode);
 
        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
@@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
 /**
  * hrtimer_get_remaining - get remaining time for the timer
  * @timer:     the timer to read
+ * @adjust:    adjust relative timers when CONFIG_TIME_LOW_RES=y
  */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
 {
        unsigned long flags;
        ktime_t rem;
 
        lock_hrtimer_base(timer, &flags);
-       rem = hrtimer_expires_remaining(timer);
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
+               rem = hrtimer_expires_remaining_adjusted(timer);
+       else
+               rem = hrtimer_expires_remaining(timer);
        unlock_hrtimer_base(timer, &flags);
 
        return rem;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_HZ_COMMON
 /**
@@ -1219,6 +1230,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
        timer_stats_account_hrtimer(timer);
        fn = timer->function;
 
+       /*
+        * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
+        * timer is restarted with a period then it becomes an absolute
+        * timer. If its not restarted it does not matter.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES))
+               timer->is_rel = false;
+
        /*
         * Because we run timers from hardirq context, there is no chance
         * they get migrated to another cpu, therefore its safe to unlock
index 8d262b4675738d21bea752d65a47117c3d8ae514..1d5c7204ddc97bce881431b399e58741961c6391 100644 (file)
@@ -26,7 +26,7 @@
  */
 static struct timeval itimer_get_remtime(struct hrtimer *timer)
 {
-       ktime_t rem = hrtimer_get_remaining(timer);
+       ktime_t rem = __hrtimer_get_remaining(timer, true);
 
        /*
         * Racy but safe: if the itimer expires after the above
index 36f2ca09aa5e458bf1f31e3ec018e180dcc5b345..6df8927c58a5bbedcab9efceac7c21985bec0bf0 100644 (file)
@@ -685,8 +685,18 @@ int ntp_validate_timex(struct timex *txc)
                if (!capable(CAP_SYS_TIME))
                        return -EPERM;
 
-               if (!timeval_inject_offset_valid(&txc->time))
-                       return -EINVAL;
+               if (txc->modes & ADJ_NANO) {
+                       struct timespec ts;
+
+                       ts.tv_sec = txc->time.tv_sec;
+                       ts.tv_nsec = txc->time.tv_usec;
+                       if (!timespec_inject_offset_valid(&ts))
+                               return -EINVAL;
+
+               } else {
+                       if (!timeval_inject_offset_valid(&txc->time))
+                               return -EINVAL;
+               }
        }
 
        /*
index 31d11ac9fa4739789728c44470b82115ec307d11..f2826c35e9185f60586f06dff53f51ca3324c659 100644 (file)
@@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
            (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
                timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
-       remaining = ktime_sub(hrtimer_get_expires(timer), now);
+       remaining = __hrtimer_expires_remaining_adjusted(timer, now);
        /* Return 0 only, when the timer is expired and not pending */
        if (remaining.tv64 <= 0) {
                /*
index 9d7a053545f5aca7a324f3530ee52ca9dac413f8..0b17424349eb4dafd76a2cf588748988ce51a295 100644 (file)
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
-/*
- * The time, when the last jiffy update happened. Protected by jiffies_lock.
- */
-static ktime_t last_jiffies_update;
-
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
        return &per_cpu(tick_cpu_sched, cpu);
 }
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+/*
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
+ */
+static ktime_t last_jiffies_update;
+
 /*
  * Must be called with interrupts disabled !
  */
@@ -151,6 +152,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
@@ -993,9 +995,9 @@ static void tick_nohz_switch_to_nohz(void)
        /* Get the next period */
        next = tick_init_jiffy_update();
 
-       hrtimer_forward_now(&ts->sched_timer, tick_period);
        hrtimer_set_expires(&ts->sched_timer, next);
-       tick_program_event(next, 1);
+       hrtimer_forward_now(&ts->sched_timer, tick_period);
+       tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
        tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 
index f75e35b6014900da71ffa438a507c34f19e36e3a..ba7d8b288bb37250ce9e9c0e5f562b45832c4ca9 100644 (file)
@@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
        print_name_offset(m, taddr);
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->function);
-       SEQ_printf(m, ", S:%02lx", timer->state);
+       SEQ_printf(m, ", S:%02x", timer->state);
 #ifdef CONFIG_TIMER_STATS
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->start_site);
index 45dd798bcd37e7bd529e61932e51a30366590626..326a75e884dbf3f46513538861a028c7be459f09 100644 (file)
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct perf_event *event;
+       struct file *file;
 
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (!event)
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        /* make sure event is local and doesn't have pmu::count */
        if (event->oncpu != smp_processor_id() ||
            event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        void *data = (void *) (long) r4;
        struct perf_sample_data sample_data;
        struct perf_event *event;
+       struct file *file;
        struct perf_raw_record raw = {
                .size = size,
                .data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (unlikely(!event))
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
                     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
                return -EINVAL;
index 87fb9801bd9eead0d6e45bef49bc1eefc324c6a7..d9293402ee685ae59007fe1ade71866e121a0882 100644 (file)
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 {
        __buffer_unlock_commit(buffer, event);
 
-       ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+       ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
        ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
index dda9e6742950305f36fbe920f9fe0c6f68d83fbf..202df6cffccaab333c02facc56b28082b09e5e42 100644 (file)
@@ -125,6 +125,13 @@ check_stack(unsigned long ip, unsigned long *stack)
                        break;
        }
 
+       /*
+        * Some archs may not have the passed in ip in the dump.
+        * If that happens, we need to show everything.
+        */
+       if (i == stack_trace_max.nr_entries)
+               i = 0;
+
        /*
         * Now find where in the stack these are.
         */
index 61a0264e28f9b5917c0e8a60bb9668b21e59c4b2..7ff5dc7d2ac5f47395bc3be8484c642c5d577b6b 100644 (file)
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock);    /* protects wq->maydays list */
 static LIST_HEAD(workqueues);          /* PR: list of all workqueues */
 static bool workqueue_freezing;                /* PL: have wqs started freezing? */
 
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+
+/*
+ * Local execution of unbound work items is no longer guaranteed.  The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
                                                  int node)
 {
        assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+       /*
+        * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+        * delayed item is pending.  The plan is to keep CPU -> NODE
+        * mapping valid and stable across CPU on/offlines.  Once that
+        * happens, this workaround can be removed.
+        */
+       if (unlikely(node == NUMA_NO_NODE))
+               return wq->dfl_pwq;
+
        return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 }
 
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
        return worker && worker->current_pwq->wq == wq;
 }
 
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+       static bool printed_dbg_warning;
+       int new_cpu;
+
+       if (likely(!wq_debug_force_rr_cpu)) {
+               if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+                       return cpu;
+       } else if (!printed_dbg_warning) {
+               pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+               printed_dbg_warning = true;
+       }
+
+       if (cpumask_empty(wq_unbound_cpumask))
+               return cpu;
+
+       new_cpu = __this_cpu_read(wq_rr_cpu_last);
+       new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+       if (unlikely(new_cpu >= nr_cpu_ids)) {
+               new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+               if (unlikely(new_cpu >= nr_cpu_ids))
+                       return cpu;
+       }
+       __this_cpu_write(wq_rr_cpu_last, new_cpu);
+
+       return new_cpu;
+}
+
 static void __queue_work(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work)
 {
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
                return;
 retry:
        if (req_cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
+               cpu = wq_select_unbound_cpu(raw_smp_processor_id());
 
        /* pwq which will be used unless @work is executing elsewhere */
        if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
        timer_stats_timer_set_start_info(&dwork->timer);
 
        dwork->wq = wq;
-       /* timer isn't guaranteed to run in this cpu, record earlier */
-       if (cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
        dwork->cpu = cpu;
        timer->expires = jiffies + delay;
 
-       add_timer_on(timer, cpu);
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
 }
 
 /**
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
        WARN_ONCE(current->flags & PF_MEMALLOC,
                  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
                  current->pid, current->comm, target_wq->name, target_func);
-       WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+       WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+                             (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
                  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
                  worker->current_pwq->wq->name, worker->current_func,
                  target_wq->name, target_func);
index 5a0c1c83cdf0af97ea9446657b3527dc1e206d45..133ebc0c17735bf14be352776609bab732347f9d 100644 (file)
@@ -210,9 +210,11 @@ config RANDOM32_SELFTEST
 # compression support is select'ed if needed
 #
 config 842_COMPRESS
+       select CRC32
        tristate
 
 config 842_DECOMPRESS
+       select CRC32
        tristate
 
 config ZLIB_INFLATE
@@ -475,6 +477,11 @@ config DDR
          information. This data is useful for drivers handling
          DDR SDRAM controllers.
 
+config IRQ_POLL
+       bool "IRQ polling library"
+       help
+         Helper library to poll interrupt mitigation using polling.
+
 config MPILIB
        tristate
        select CLZ_TAB
index ecb9e75614bf87f1e368074d6ef84bfe3f1003b8..8bfd1aca7a3d01a9887700b3f90d1d5697a9dab4 100644 (file)
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
 
 endmenu # "RCU Debugging"
 
+config DEBUG_WQ_FORCE_RR_CPU
+       bool "Force round-robin CPU selection for unbound work items"
+       depends on DEBUG_KERNEL
+       default n
+       help
+         Workqueue used to implicitly guarantee that work items queued
+         without explicit CPU specified are put on the local CPU.  This
+         guarantee is no longer true and while local CPU is still
+         preferred work items may be put on foreign CPUs.  Kernel
+         parameter "workqueue.debug_force_rr_cpu" is added to force
+         round-robin CPU selection to flush out usages which depend on the
+         now broken guarantee.  This config option enables the debug
+         feature by default.  When enabled, memory and cache locality will
+         be impacted.
+
 config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
        depends on DEBUG_KERNEL
index 2d4bc33d09b42097085480d7ce93ec3a8882faba..a7c26a41a738bd1dee37a0654cc87e5c51d65653 100644 (file)
@@ -165,6 +165,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
 
 obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
               fdt_empty_tree.o
index 547f7f923dbcbd24f8bb99e13ef7c13db947b8dd..519b5a10fd704dd412c7cc9c7327fcb2da3c6f0e 100644 (file)
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS       14
 #define ODEBUG_HASH_SIZE       (1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE       512
+#define ODEBUG_POOL_SIZE       1024
 #define ODEBUG_POOL_MIN_LEVEL  256
 
 #define ODEBUG_CHUNK_SHIFT     PAGE_SHIFT
index 6745c6230db3403629048256968443f51b777655..c30d07e99dba4cc32be6aeb4d353d79058509b4b 100644 (file)
@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1);
 
 asmlinkage __visible void dump_stack(void)
 {
+       unsigned long flags;
        int was_locked;
        int old;
        int cpu;
@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void)
         * Permit this cpu to perform nested stack dumps while serialising
         * against other CPUs
         */
-       preempt_disable();
-
 retry:
+       local_irq_save(flags);
        cpu = smp_processor_id();
        old = atomic_cmpxchg(&dump_lock, -1, cpu);
        if (old == -1) {
@@ -43,6 +43,7 @@ retry:
        } else if (old == cpu) {
                was_locked = 1;
        } else {
+               local_irq_restore(flags);
                cpu_relax();
                goto retry;
        }
@@ -52,7 +53,7 @@ retry:
        if (!was_locked)
                atomic_set(&dump_lock, -1);
 
-       preempt_enable();
+       local_irq_restore(flags);
 }
 #else
 asmlinkage __visible void dump_stack(void)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
new file mode 100644 (file)
index 0000000..836f7db
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/irq_poll.h>
+#include <linux/delay.h>
+
+static unsigned int irq_poll_budget __read_mostly = 256;
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * irq_poll_sched - Schedule a run of the iopoll handler
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Add this irq_poll structure to the pending poll list and trigger the
+ *     raise of the blk iopoll softirq.
+ **/
+void irq_poll_sched(struct irq_poll *iop)
+{
+       unsigned long flags;
+
+       if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+               return;
+       if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+               return;
+
+       local_irq_save(flags);
+       list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
+       __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_sched);
+
+/**
+ * __irq_poll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     See irq_poll_complete(). This function must be called with interrupts
+ *     disabled.
+ **/
+static void __irq_poll_complete(struct irq_poll *iop)
+{
+       list_del(&iop->list);
+       smp_mb__before_atomic();
+       clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+
+/**
+ * irq_poll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     If a driver consumes less than the assigned budget in its run of the
+ *     iopoll handler, it'll end the polled mode by calling this function. The
+ *     iopoll handler will not be invoked again before irq_poll_sched()
+ *     is called.
+ **/
+void irq_poll_complete(struct irq_poll *iop)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __irq_poll_complete(iop);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_complete);
+
+static void irq_poll_softirq(struct softirq_action *h)
+{
+       struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
+       int rearm = 0, budget = irq_poll_budget;
+       unsigned long start_time = jiffies;
+
+       local_irq_disable();
+
+       while (!list_empty(list)) {
+               struct irq_poll *iop;
+               int work, weight;
+
+               /*
+                * If softirq window is exhausted then punt.
+                */
+               if (budget <= 0 || time_after(jiffies, start_time)) {
+                       rearm = 1;
+                       break;
+               }
+
+               local_irq_enable();
+
+               /* Even though interrupts have been re-enabled, this
+                * access is safe because interrupts can only add new
+                * entries to the tail of this list, and only ->poll()
+                * calls can remove this head entry from the list.
+                */
+               iop = list_entry(list->next, struct irq_poll, list);
+
+               weight = iop->weight;
+               work = 0;
+               if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
+                       work = iop->poll(iop, weight);
+
+               budget -= work;
+
+               local_irq_disable();
+
+               /*
+                * Drivers must not modify the iopoll state, if they
+                * consume their assigned weight (or more, some drivers can't
+                * easily just stop processing, they have to complete an
+                * entire mask of commands).In such cases this code
+                * still "owns" the iopoll instance and therefore can
+                * move the instance around on the list at-will.
+                */
+               if (work >= weight) {
+                       if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+                               __irq_poll_complete(iop);
+                       else
+                               list_move_tail(&iop->list, list);
+               }
+       }
+
+       if (rearm)
+               __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+
+       local_irq_enable();
+}
+
+/**
+ * irq_poll_disable - Disable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void irq_poll_disable(struct irq_poll *iop)
+{
+       set_bit(IRQ_POLL_F_DISABLE, &iop->state);
+       while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+               msleep(1);
+       clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_disable);
+
+/**
+ * irq_poll_enable - Enable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Enable iopoll on this @iop. Note that the handler run will not be
+ *     scheduled, it will only mark it as active.
+ **/
+void irq_poll_enable(struct irq_poll *iop)
+{
+       BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
+       smp_mb__before_atomic();
+       clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_enable);
+
+/**
+ * irq_poll_init - Initialize this @iop
+ * @iop:      The parent iopoll structure
+ * @weight:   The default weight (or command completion budget)
+ * @poll_fn:  The handler to invoke
+ *
+ * Description:
+ *     Initialize and enable this irq_poll structure.
+ **/
+void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
+{
+       memset(iop, 0, sizeof(*iop));
+       INIT_LIST_HEAD(&iop->list);
+       iop->weight = weight;
+       iop->poll = poll_fn;
+}
+EXPORT_SYMBOL(irq_poll_init);
+
+static int irq_poll_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               int cpu = (unsigned long) hcpu;
+
+               local_irq_disable();
+               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+                                this_cpu_ptr(&blk_cpu_iopoll));
+               __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block irq_poll_cpu_notifier = {
+       .notifier_call  = irq_poll_cpu_notify,
+};
+
+static __init int irq_poll_setup(void)
+{
+       int i;
+
+       for_each_possible_cpu(i)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+       open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
+       register_hotcpu_notifier(&irq_poll_cpu_notifier);
+       return 0;
+}
+subsys_initcall(irq_poll_setup);
index 31ce853fbfb1be6a2b1905e593377520ab951eab..74a54b7f25626e8c6d224af2b7384f7dcbf2e72f 100644 (file)
@@ -75,3 +75,4 @@ module_exit(libcrc32c_mod_fini);
 MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
 MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
index fcf5d98574ce46871dca087d2c803dbfb67c0b81..6b79e9026e24894000a2bdf4180db80f8190b357 100644 (file)
@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                return 0;
 
        radix_tree_for_each_slot(slot, root, &iter, first_index) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                if (++ret == max_items)
                        break;
        }
@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                return 0;
 
        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                if (++ret == max_items)
                        break;
        }
index 40e03ea2a967d978cffae1cbeffe23d609ce01bc..2c5de86460c5bb82f3d1c83b136b2f985484298a 100644 (file)
@@ -49,7 +49,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
                if (rs->missed)
                        printk(KERN_WARNING "%s: %d callbacks suppressed\n",
                                func, rs->missed);
-               rs->begin   = 0;
+               rs->begin   = jiffies;
                rs->printed = 0;
                rs->missed  = 0;
        }
index bafa9933fa768d6f76e4ade1296c873a3c38c139..004fc70fc56a3d06947f9e89c60e40e272ce551c 100644 (file)
@@ -598,9 +598,9 @@ EXPORT_SYMBOL(sg_miter_next);
  *
  * Description:
  *   Stops mapping iterator @miter.  @miter should have been started
- *   started using sg_miter_start().  A stopped iteration can be
- *   resumed by calling sg_miter_next() on it.  This is useful when
- *   resources (kmap) need to be released during iteration.
+ *   using sg_miter_start().  A stopped iteration can be resumed by
+ *   calling sg_miter_next() on it.  This is useful when resources (kmap)
+ *   need to be released during iteration.
  *
  * Context:
  *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
index 98866a770770c8bba0acf7bdbacea4699d9f14bf..25b5cbfb7615bd63da19677c877b9e49c3f519e3 100644 (file)
@@ -327,36 +327,67 @@ out:
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
        do {                                                                   \
-               BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-               __test_string_get_size((size), (blk_size), (units),            \
-                                      (exp_result));                          \
+               BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+               BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+               __test_string_get_size((size), (blk_size), (exp_result10),     \
+                                      (exp_result2));                         \
        } while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-                                         const enum string_size_units units,
-                                         const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+                                             const char *exp,
+                                             char *res,
+                                             const u64 size,
+                                             const u64 blk_size)
 {
-       char buf[string_get_size_maxbuf];
-
-       string_get_size(size, blk_size, units, buf, sizeof(buf));
-       if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+       if (!memcmp(res, exp, strlen(exp) + 1))
                return;
 
-       buf[sizeof(buf) - 1] = '\0';
-       pr_warn("Test 'test_string_get_size_one' failed!\n");
-       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+       res[string_get_size_maxbuf - 1] = '\0';
+
+       pr_warn("Test 'test_string_get_size' failed!\n");
+       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
                size, blk_size, units);
-       pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+       pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+                                         const char *exp_result10,
+                                         const char *exp_result2)
+{
+       char buf10[string_get_size_maxbuf];
+       char buf2[string_get_size_maxbuf];
+
+       string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+       string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+       test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+                                  size, blk_size);
+
+       test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+                                  size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-       test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-       test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-       test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+       /* small values */
+       test_string_get_size_one(0, 512, "0 B", "0 B");
+       test_string_get_size_one(1, 512, "512 B", "512 B");
+       test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+       /* normal values */
+       test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+       test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+       test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+       /* weird block sizes */
+       test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+       /* huge values */
+       test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+       test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)
index 97a4e06b15c00dfd7f8b8bc0f1e1e4610b769938..03cbfa072f42a7378ed57b8b1060641b31e7d97e 100644 (file)
@@ -624,7 +624,7 @@ config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
        bool
 
 config DEFERRED_STRUCT_PAGE_INIT
-       bool "Defer initialisation of struct pages to kswapd"
+       bool "Defer initialisation of struct pages to kthreads"
        default n
        depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
        depends on MEMORY_HOTPLUG
@@ -633,9 +633,10 @@ config DEFERRED_STRUCT_PAGE_INIT
          single thread. On very large machines this can take a considerable
          amount of time. If this option is set, large machines will bring up
          a subset of memmap at boot and then initialise the rest in parallel
-         when kswapd starts. This has a potential performance impact on
-         processes running early in the lifetime of the systemm until kswapd
-         finishes the initialisation.
+         by starting one-off "pgdatinitX" kernel thread for each node X. This
+         has a potential performance impact on processes running early in the
+         lifetime of the system until these kthreads finish the
+         initialisation.
 
 config IDLE_PAGE_TRACKING
        bool "Enable idle page tracking"
index cc5d29d2da9b4b36be3fde383267ff10c59e1422..926c76d56388d12ea723b68add1e5b7ab6613842 100644 (file)
@@ -989,7 +989,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
                 * here rather than calling cond_resched().
                 */
                if (current->flags & PF_WQ_WORKER)
-                       schedule_timeout(1);
+                       schedule_timeout_uninterruptible(1);
                else
                        cond_resched();
 
index 8fc50811119b28b54f15ac875ddc956217acc570..ba5d8f3e6d68a3769589c372adc3183b4addfb40 100644 (file)
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@ static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
        if (cmpxchg(&cleancache_ops, NULL, ops))
                return -EBUSY;
index 847ee43c28068a0fb744fe124c1d8afa429d0719..bc943867d68c68dab4109fe715c8d37d6c72757a 100644 (file)
@@ -11,6 +11,7 @@
  */
 #include <linux/export.h>
 #include <linux/compiler.h>
+#include <linux/dax.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
        __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
 
        if (shadow) {
-               mapping->nrshadows++;
+               mapping->nrexceptional++;
                /*
-                * Make sure the nrshadows update is committed before
+                * Make sure the nrexceptional update is committed before
                 * the nrpages update so that final truncate racing
                 * with reclaim does not see both counters 0 at the
                 * same time and miss a shadow entry.
@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 {
        int err = 0;
 
+       if (dax_mapping(mapping) && mapping->nrexceptional) {
+               err = dax_writeback_mapping_range(mapping, lstart, lend);
+               if (err)
+                       return err;
+       }
+
        if (mapping->nrpages) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
                p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
                if (!radix_tree_exceptional_entry(p))
                        return -EEXIST;
+
+               if (WARN_ON(dax_mapping(mapping)))
+                       return -EINVAL;
+
                if (shadowp)
                        *shadowp = p;
-               mapping->nrshadows--;
+               mapping->nrexceptional--;
                if (node)
                        workingset_node_shadows_dec(node);
        }
@@ -1245,9 +1256,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto restart;
                        /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Return
-                        * it without attempting to raise page count.
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
                         */
                        goto export;
                }
@@ -1494,6 +1505,74 @@ repeat:
 }
 EXPORT_SYMBOL(find_get_pages_tag);
 
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:   the address_space to search
+ * @start:     the starting page cache index
+ * @tag:       the tag index
+ * @nr_entries:        the maximum number of entries
+ * @entries:   where the resulting entries are placed
+ * @indices:   the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+                       int tag, unsigned int nr_entries,
+                       struct page **entries, pgoff_t *indices)
+{
+       void **slot;
+       unsigned int ret = 0;
+       struct radix_tree_iter iter;
+
+       if (!nr_entries)
+               return 0;
+
+       rcu_read_lock();
+restart:
+       radix_tree_for_each_tagged(slot, &mapping->page_tree,
+                                  &iter, start, tag) {
+               struct page *page;
+repeat:
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page))
+                       continue;
+               if (radix_tree_exception(page)) {
+                       if (radix_tree_deref_retry(page)) {
+                               /*
+                                * Transient condition which can only trigger
+                                * when entry at index 0 moves out of or back
+                                * to root: none yet gotten, safe to restart.
+                                */
+                               goto restart;
+                       }
+
+                       /*
+                        * A shadow entry of a recently evicted page, a swap
+                        * entry from shmem/tmpfs or a DAX entry.  Return it
+                        * without attempting to raise page count.
+                        */
+                       goto export;
+               }
+               if (!page_cache_get_speculative(page))
+                       goto repeat;
+
+               /* Has the page moved? */
+               if (unlikely(page != *slot)) {
+                       page_cache_release(page);
+                       goto repeat;
+               }
+export:
+               indices[ret] = iter.index;
+               entries[ret] = page;
+               if (++ret == nr_entries)
+                       break;
+       }
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
@@ -2684,11 +2763,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret > 0)
                ret = __generic_file_write_iter(iocb, from);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 
        if (ret > 0) {
                ssize_t err;
index b64a36175884e07604b0e216bc2d545a2892dcb7..7bf19ffa21999c13fa1f24dc01a6bda77217688c 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -430,10 +430,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
                         * Anon pages in shared mappings are surprising: now
                         * just reject it.
                         */
-                       if (!is_cow_mapping(vm_flags)) {
-                               WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
+                       if (!is_cow_mapping(vm_flags))
                                return -EFAULT;
-                       }
                }
        } else if (!(vm_flags & VM_READ)) {
                if (!(gup_flags & FOLL_FORCE))
index 8ad580273521046904ec13478e7b897839d955d7..08fc0ba2207e555a9c734524cd607dc8f53e644a 100644 (file)
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
        .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
 };
 
-static DEFINE_SPINLOCK(split_queue_lock);
-static LIST_HEAD(split_queue);
-static unsigned long split_queue_len;
 static struct shrinker deferred_split_shrinker;
 
 static void set_recommended_min_free_kbytes(void)
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                return false;
        entry = mk_pmd(zero_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
-       pgtable_trans_huge_deposit(mm, pmd, pgtable);
+       if (pgtable)
+               pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, haddr, pmd, entry);
        atomic_long_inc(&mm->nr_ptes);
        return true;
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        spinlock_t *dst_ptl, *src_ptl;
        struct page *src_page;
        pmd_t pmd;
-       pgtable_t pgtable;
+       pgtable_t pgtable = NULL;
        int ret;
 
-       ret = -ENOMEM;
-       pgtable = pte_alloc_one(dst_mm, addr);
-       if (unlikely(!pgtable))
-               goto out;
+       if (!vma_is_dax(vma)) {
+               ret = -ENOMEM;
+               pgtable = pte_alloc_one(dst_mm, addr);
+               if (unlikely(!pgtable))
+                       goto out;
+       }
 
        dst_ptl = pmd_lock(dst_mm, dst_pmd);
        src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                goto out_unlock;
        }
 
-       if (pmd_trans_huge(pmd)) {
+       if (!vma_is_dax(vma)) {
                /* thp accounting separate from pmd_devmap accounting */
                src_page = pmd_page(pmd);
                VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
@@ -1560,7 +1560,8 @@ int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        struct mm_struct *mm = tlb->mm;
        int ret = 0;
 
-       if (!pmd_trans_huge_lock(pmd, vma, &ptl))
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (!ptl)
                goto out_unlocked;
 
        orig_pmd = *pmd;
@@ -1627,7 +1628,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        pmd_t orig_pmd;
        spinlock_t *ptl;
 
-       if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
+       ptl = __pmd_trans_huge_lock(pmd, vma);
+       if (!ptl)
                return 0;
        /*
         * For architectures like ppc64 we look at deposited pgtable
@@ -1690,7 +1692,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
         * We don't have to worry about the ordering of src and dst
         * ptlocks because exclusive mmap_sem prevents deadlock.
         */
-       if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
+       old_ptl = __pmd_trans_huge_lock(old_pmd, vma);
+       if (old_ptl) {
                new_ptl = pmd_lockptr(mm, new_pmd);
                if (new_ptl != old_ptl)
                        spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
@@ -1724,7 +1727,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
        spinlock_t *ptl;
        int ret = 0;
 
-       if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = __pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                pmd_t entry;
                bool preserve_write = prot_numa && pmd_write(*pmd);
                ret = 1;
@@ -1760,14 +1764,14 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  * Note that if it returns true, this routine returns without unlocking page
  * table lock. So callers must unlock it.
  */
-bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
-               spinlock_t **ptl)
+spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 {
-       *ptl = pmd_lock(vma->vm_mm, pmd);
+       spinlock_t *ptl;
+       ptl = pmd_lock(vma->vm_mm, pmd);
        if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
-               return true;
-       spin_unlock(*ptl);
-       return false;
+               return ptl;
+       spin_unlock(ptl);
+       return NULL;
 }
 
 #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
@@ -2068,7 +2072,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
        if (likely(writable)) {
                if (likely(referenced)) {
                        result = SCAN_SUCCEED;
-                       trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+                       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
                                                            referenced, writable, result);
                        return 1;
                }
@@ -2078,7 +2082,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 
 out:
        release_pte_pages(pte, _pte);
-       trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
                                            referenced, writable, result);
        return 0;
 }
@@ -2576,7 +2580,7 @@ out_unmap:
                collapse_huge_page(mm, address, hpage, vma, node);
        }
 out:
-       trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced,
+       trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
                                     none_or_zero, result);
        return ret;
 }
@@ -3354,6 +3358,7 @@ int total_mapcount(struct page *page)
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct page *head = compound_head(page);
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
        struct anon_vma *anon_vma;
        int count, mapcount, ret;
        bool mlocked;
@@ -3397,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                lru_add_drain();
 
        /* Prevent deferred_split_scan() touching ->_count */
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        count = page_count(head);
        mapcount = total_mapcount(head);
        if (!mapcount && count == 1) {
                if (!list_empty(page_deferred_list(head))) {
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                        list_del(page_deferred_list(head));
                }
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                __split_huge_page(page, list);
                ret = 0;
        } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                pr_alert("total_mapcount: %u, page_count(): %u\n",
                                mapcount, count);
                if (PageTail(page))
@@ -3417,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                dump_page(page, "total_mapcount(head) > 0");
                BUG();
        } else {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                unfreeze_page(anon_vma, head);
                ret = -EBUSY;
        }
@@ -3432,64 +3437,65 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
        unsigned long flags;
 
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (!list_empty(page_deferred_list(page))) {
-               split_queue_len--;
+               pgdata->split_queue_len--;
                list_del(page_deferred_list(page));
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
        free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
        unsigned long flags;
 
        VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
-               list_add_tail(page_deferred_list(page), &split_queue);
-               split_queue_len++;
+               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
+               pgdata->split_queue_len++;
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
                struct shrink_control *sc)
 {
-       /*
-        * Split a page from split_queue will free up at least one page,
-        * at most HPAGE_PMD_NR - 1. We don't track exact number.
-        * Let's use HPAGE_PMD_NR / 2 as ballpark.
-        */
-       return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       return ACCESS_ONCE(pgdata->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
                struct shrink_control *sc)
 {
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
        unsigned long flags;
        LIST_HEAD(list), *pos, *next;
        struct page *page;
        int split = 0;
 
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_init(&split_queue, &list);
-
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &list) {
+       list_for_each_safe(pos, next, &pgdata->split_queue) {
                page = list_entry((void *)pos, struct page, mapping);
                page = compound_head(page);
-               /* race with put_compound_page() */
-               if (!get_page_unless_zero(page)) {
+               if (get_page_unless_zero(page)) {
+                       list_move(page_deferred_list(page), &list);
+               } else {
+                       /* We lost race with put_compound_page() */
                        list_del_init(page_deferred_list(page));
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                }
+               if (!--sc->nr_to_scan)
+                       break;
        }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
@@ -3501,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                put_page(page);
        }
 
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_tail(&list, &split_queue);
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       list_splice_tail(&list, &pgdata->split_queue);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
 
-       return split * HPAGE_PMD_NR / 2;
+       /*
+        * Stop shrinker if we didn't split any page, but the queue is empty.
+        * This can happen if pages were freed under us.
+        */
+       if (!split && list_empty(&pgdata->split_queue))
+               return SHRINK_STOP;
+       return split;
 }
 
 static struct shrinker deferred_split_shrinker = {
        .count_objects = deferred_split_count,
        .scan_objects = deferred_split_scan,
        .seeks = DEFAULT_SEEKS,
+       .flags = SHRINKER_NUMA_AWARE,
 };
 
 #ifdef CONFIG_DEBUG_FS
index 12908dcf58316afd3f4b14d379a8e139249cd396..06ae13e869d0f088cdddc0862aa315ce8d488443 100644 (file)
@@ -1001,7 +1001,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
                ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
                nr_nodes--)
 
-#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
 static void destroy_compound_gigantic_page(struct page *page,
                                        unsigned int order)
 {
@@ -1214,8 +1214,8 @@ void free_huge_page(struct page *page)
 
        set_page_private(page, 0);
        page->mapping = NULL;
-       BUG_ON(page_count(page));
-       BUG_ON(page_mapcount(page));
+       VM_BUG_ON_PAGE(page_count(page), page);
+       VM_BUG_ON_PAGE(page_mapcount(page), page);
        restore_reserve = PagePrivate(page);
        ClearPagePrivate(page);
 
@@ -1286,6 +1286,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order)
                set_page_count(p, 0);
                set_compound_head(p, page);
        }
+       atomic_set(compound_mapcount_ptr(page), -1);
 }
 
 /*
index ed8b5ffcf9b16fbfcf3ccba0d182957980ad45ab..a38a21ebddb454540fc1dcae83be516ffc7cdf26 100644 (file)
@@ -216,6 +216,37 @@ static inline bool is_cow_mapping(vm_flags_t flags)
        return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+/*
+ * These three helpers classifies VMAs for virtual memory accounting.
+ */
+
+/*
+ * Executable code area - executable, not writable, not stack
+ */
+static inline bool is_exec_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
+}
+
+/*
+ * Stack area - atomatically grows in one direction
+ *
+ * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
+ * do_mmap() forbids all other combinations.
+ */
+static inline bool is_stack_mapping(vm_flags_t flags)
+{
+       return (flags & VM_STACK) == VM_STACK;
+}
+
+/*
+ * Data area - private, writable, not stack
+ */
+static inline bool is_data_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev, struct rb_node *rb_parent);
index d2ed81e59a94c3f81674ead9e84d51ca8ebd9950..dd7989929f13ae16e265bff30fd958f1f13993cb 100644 (file)
@@ -1448,7 +1448,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
  * Remaining API functions
  */
 
-phys_addr_t __init memblock_phys_mem_size(void)
+phys_addr_t __init_memblock memblock_phys_mem_size(void)
 {
        return memblock.memory.total_size;
 }
index ca052f2a4a0b374eb9a2a6498b407be7da28ac48..d06cae2de783acf462162e27f8770ff7bf60f4ad 100644 (file)
@@ -4638,7 +4638,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
                spin_unlock(ptl);
@@ -4826,7 +4827,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        union mc_target target;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (mc.precharge < HPAGE_PMD_NR) {
                        spin_unlock(ptl);
                        return 0;
index 30991f83d0bf54dc537f1927a10883aa5787dd97..635451abc8f7c9b84663535678a26a9b0848c89d 100644 (file)
@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
         * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
         * without pte special, it would there be refcounted as a normal page.
         */
-       if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
+       if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
                struct page *page;
 
-               page = pfn_t_to_page(pfn);
+               /*
+                * At this point we are committed to insert_page()
+                * regardless of whether the caller specified flags that
+                * result in pfn_t_has_page() == false.
+                */
+               page = pfn_to_page(pfn_t_to_pfn(pfn));
                return insert_page(vma, addr, page, vma->vm_page_prot);
        }
        return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -2232,11 +2237,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 
        page_cache_get(old_page);
 
-       /*
-        * Only catch write-faults on shared writable pages,
-        * read-only shared pages can get COWed by
-        * get_user_pages(.write=1, .force=1).
-        */
        if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
                int tmp;
 
index 27d135408a22057a5b166e7eb2bf2e080bbd33f2..4c4187c0e1deeb25bdbf5eb383132d27feb9be90 100644 (file)
@@ -548,8 +548,7 @@ retry:
                        goto retry;
                }
 
-               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
-                       migrate_page_add(page, qp->pagelist, flags);
+               migrate_page_add(page, qp->pagelist, flags);
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
@@ -625,7 +624,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
        unsigned long endvma = vma->vm_end;
        unsigned long flags = qp->flags;
 
-       if (vma->vm_flags & VM_PFNMAP)
+       if (!vma_migratable(vma))
                return 1;
 
        if (endvma > end)
@@ -644,16 +643,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 
        if (flags & MPOL_MF_LAZY) {
                /* Similar to task_numa_work, skip inaccessible VMAs */
-               if (vma_migratable(vma) &&
-                       vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
+               if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
                        change_prot_numa(vma, start, endvma);
                return 1;
        }
 
-       if ((flags & MPOL_MF_STRICT) ||
-           ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-            vma_migratable(vma)))
-               /* queue pages from current vma */
+       /* queue pages from current vma */
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                return 0;
        return 1;
 }
index 2a565ed8bb4907398a0d3d2619fd4939df777970..563f320454902df04e782f73aa8d0473656034c9 100644 (file)
@@ -117,7 +117,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        unsigned char *vec = walk->private;
        int nr = (end - addr) >> PAGE_SHIFT;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                memset(vec, 1, nr);
                spin_unlock(ptl);
                goto out;
index e1e2b1207bf2ee00604468d027b3879381e1b089..96f00104192861aec6358d9fae5f9b39fecb43a4 100644 (file)
@@ -175,7 +175,7 @@ static void __munlock_isolation_failed(struct page *page)
  */
 unsigned int munlock_vma_page(struct page *page)
 {
-       unsigned int nr_pages;
+       int nr_pages;
        struct zone *zone = page_zone(page);
 
        /* For try_to_munlock() and to serialize with page migration */
index 84b12624ceb01d83762172634179825b086961fd..2f2415a7a688da595a996a01d0ae0e1f0f955454 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
 int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 #endif
 
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
@@ -387,8 +390,9 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_DEBUG_VM_RB
-static int browse_rb(struct rb_root *root)
+static int browse_rb(struct mm_struct *mm)
 {
+       struct rb_root *root = &mm->mm_rb;
        int i = 0, j, bug = 0;
        struct rb_node *nd, *pn = NULL;
        unsigned long prev = 0, pend = 0;
@@ -411,12 +415,14 @@ static int browse_rb(struct rb_root *root)
                                  vma->vm_start, vma->vm_end);
                        bug = 1;
                }
+               spin_lock(&mm->page_table_lock);
                if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
                        pr_emerg("free gap %lx, correct %lx\n",
                               vma->rb_subtree_gap,
                               vma_compute_subtree_gap(vma));
                        bug = 1;
                }
+               spin_unlock(&mm->page_table_lock);
                i++;
                pn = nd;
                prev = vma->vm_start;
@@ -453,12 +459,16 @@ static void validate_mm(struct mm_struct *mm)
        struct vm_area_struct *vma = mm->mmap;
 
        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
@@ -472,7 +482,7 @@ static void validate_mm(struct mm_struct *mm)
                          mm->highest_vm_end, highest_address);
                bug = 1;
        }
-       i = browse_rb(&mm->mm_rb);
+       i = browse_rb(mm);
        if (i != mm->map_count) {
                if (i != -1)
                        pr_emerg("map_count %d rb %d\n", mm->map_count, i);
@@ -2139,32 +2149,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
-       int error;
+       int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against wrapping around to address 0. */
+       if (address < PAGE_ALIGN(address+4))
+               address = PAGE_ALIGN(address+4);
+       else
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
-       vma_lock_anon_vma(vma);
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
         */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
@@ -2182,7 +2187,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2205,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2221,25 +2226,21 @@ int expand_downwards(struct vm_area_struct *vma,
        struct mm_struct *mm = vma->vm_mm;
        int error;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
        address &= PAGE_MASK;
        error = security_mmap_addr(address);
        if (error)
                return error;
 
-       vma_lock_anon_vma(vma);
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
@@ -2257,7 +2258,7 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2278,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma,
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2982,9 +2983,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
                return false;
 
-       if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
-                               (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
-               return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
+       if (is_data_mapping(flags) &&
+           mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+               if (ignore_rlimit_data)
+                       pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+                                    "%lu. Will be forbidden soon.\n",
+                                    current->comm, current->pid,
+                                    (mm->data_vm + npages) << PAGE_SHIFT,
+                                    rlimit(RLIMIT_DATA));
+               else
+                       return false;
+       }
 
        return true;
 }
@@ -2993,11 +3002,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 {
        mm->total_vm += npages;
 
-       if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
+       if (is_exec_mapping(flags))
                mm->exec_vm += npages;
-       else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
+       else if (is_stack_mapping(flags))
                mm->stack_vm += npages;
-       else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+       else if (is_data_mapping(flags))
                mm->data_vm += npages;
 }
 
index 63358d9f9aa98eff0848879b0503b5d80b9ea8d0..838ca8bb64f7376062fc6670c759a27d7f59c7e3 100644 (file)
@@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
        spin_lock_init(&pgdat->numabalancing_migrate_lock);
        pgdat->numabalancing_migrate_nr_pages = 0;
        pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spin_lock_init(&pgdat->split_queue_lock);
+       INIT_LIST_HEAD(&pgdat->split_queue);
+       pgdat->split_queue_len = 0;
 #endif
        init_waitqueue_head(&pgdat->kswapd_wait);
        init_waitqueue_head(&pgdat->pfmemalloc_wait);
@@ -6615,7 +6620,7 @@ bool is_pageblock_removable_nolock(struct page *page)
        return !has_unmovable_pages(zone, page, 0, true);
 }
 
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 
 static unsigned long pfn_max_align_down(unsigned long pfn)
 {
index 8a943b97a053a6aef9c939b6b06854fedea01b7b..998607adf6eb840a6ae30371484839277eaaab6b 100644 (file)
@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size)
 /**
  * pcpu_mem_free - free memory
  * @ptr: memory to free
- * @size: size of the area
  *
  * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
  */
-static void pcpu_mem_free(void *ptr, size_t size)
+static void pcpu_mem_free(void *ptr)
 {
-       if (size <= PAGE_SIZE)
-               kfree(ptr);
-       else
-               vfree(ptr);
+       kvfree(ptr);
 }
 
 /**
@@ -463,8 +459,8 @@ out_unlock:
         * pcpu_mem_free() might end up calling vfree() which uses
         * IRQ-unsafe lock and thus can't be called under pcpu_lock.
         */
-       pcpu_mem_free(old, old_size);
-       pcpu_mem_free(new, new_size);
+       pcpu_mem_free(old);
+       pcpu_mem_free(new);
 
        return 0;
 }
@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
-               pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+               pcpu_mem_free(chunk);
                return NULL;
        }
 
@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
 {
        if (!chunk)
                return;
-       pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
-       pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+       pcpu_mem_free(chunk->map);
+       pcpu_mem_free(chunk);
 }
 
 /**
index fa2ceb2d2655dbd8e06ecd4e02384df5ad5b5891..440e2a7e6c1c2ca706376eb72d719fb8b83af05a 100644 (file)
@@ -701,8 +701,7 @@ static void shmem_evict_inode(struct inode *inode)
                        list_del_init(&info->swaplist);
                        mutex_unlock(&shmem_swaplist_mutex);
                }
-       } else
-               kfree(info->symlink);
+       }
 
        simple_xattrs_free(&info->xattrs);
        WARN_ON(inode->i_blocks);
@@ -1902,7 +1901,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* We're holding i_mutex so we can access i_size directly */
 
        if (offset < 0)
@@ -1926,7 +1925,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -2091,7 +2090,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        if (seals & ~(unsigned int)F_ALL_SEALS)
                return -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (info->seals & F_SEAL_SEAL) {
                error = -EPERM;
@@ -2114,7 +2113,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        error = 0;
 
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(shmem_add_seals);
@@ -2164,7 +2163,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                struct address_space *mapping = file->f_mapping;
@@ -2277,7 +2276,7 @@ undone:
        inode->i_private = NULL;
        spin_unlock(&inode->i_lock);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
@@ -2549,13 +2548,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
        info = SHMEM_I(inode);
        inode->i_size = len-1;
        if (len <= SHORT_SYMLINK_LEN) {
-               info->symlink = kmemdup(symname, len, GFP_KERNEL);
-               if (!info->symlink) {
+               inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+               if (!inode->i_link) {
                        iput(inode);
                        return -ENOMEM;
                }
                inode->i_op = &shmem_short_symlink_operations;
-               inode->i_link = info->symlink;
        } else {
                inode_nohighmem(inode);
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -3132,6 +3130,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 static void shmem_destroy_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
+       kfree(inode->i_link);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
index c43f654a7b645474592de879f0664e4698d9dfbd..d2c37365e2d6e429cf0f7cdbb62308f5e76aea93 100644 (file)
@@ -1956,9 +1956,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                set_blocksize(bdev, old_block_size);
                blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
        } else {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                inode->i_flags &= ~S_SWAPFILE;
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
        filp_close(swap_file, NULL);
 
@@ -2183,7 +2183,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
                p->bdev = inode->i_sb->s_bdev;
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                if (IS_SWAPFILE(inode))
                        return -EBUSY;
        } else
@@ -2416,7 +2416,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        mapping = swap_file->f_mapping;
        inode = mapping->host;
 
-       /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+       /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
        error = claim_swapfile(p, inode);
        if (unlikely(error))
                goto bad_swap;
@@ -2561,7 +2561,7 @@ bad_swap:
        vfree(cluster_info);
        if (swap_file) {
                if (inode && S_ISREG(inode->i_mode)) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        inode = NULL;
                }
                filp_close(swap_file, NULL);
@@ -2574,7 +2574,7 @@ out:
        if (name)
                putname(name);
        if (inode && S_ISREG(inode->i_mode))
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        return error;
 }
 
index 76e35ad971025ce5eb3781543537d1bf3b947b8d..e3ee0e27cd17f20d1d35f1acdeab18136ccc1e29 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/backing-dev.h>
+#include <linux/dax.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
                return;
 
        spin_lock_irq(&mapping->tree_lock);
-       /*
-        * Regular page slots are stabilized by the page lock even
-        * without the tree itself locked.  These unlocked entries
-        * need verification under the tree lock.
-        */
-       if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
-               goto unlock;
-       if (*slot != entry)
-               goto unlock;
-       radix_tree_replace_slot(slot, NULL);
-       mapping->nrshadows--;
-       if (!node)
-               goto unlock;
-       workingset_node_shadows_dec(node);
-       /*
-        * Don't track node without shadow entries.
-        *
-        * Avoid acquiring the list_lru lock if already untracked.
-        * The list_empty() test is safe as node->private_list is
-        * protected by mapping->tree_lock.
-        */
-       if (!workingset_node_shadows(node) &&
-           !list_empty(&node->private_list))
-               list_lru_del(&workingset_shadow_nodes, &node->private_list);
-       __radix_tree_delete_node(&mapping->page_tree, node);
+
+       if (dax_mapping(mapping)) {
+               if (radix_tree_delete_item(&mapping->page_tree, index, entry))
+                       mapping->nrexceptional--;
+       } else {
+               /*
+                * Regular page slots are stabilized by the page lock even
+                * without the tree itself locked.  These unlocked entries
+                * need verification under the tree lock.
+                */
+               if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+                                       &slot))
+                       goto unlock;
+               if (*slot != entry)
+                       goto unlock;
+               radix_tree_replace_slot(slot, NULL);
+               mapping->nrexceptional--;
+               if (!node)
+                       goto unlock;
+               workingset_node_shadows_dec(node);
+               /*
+                * Don't track node without shadow entries.
+                *
+                * Avoid acquiring the list_lru lock if already untracked.
+                * The list_empty() test is safe as node->private_list is
+                * protected by mapping->tree_lock.
+                */
+               if (!workingset_node_shadows(node) &&
+                   !list_empty(&node->private_list))
+                       list_lru_del(&workingset_shadow_nodes,
+                                       &node->private_list);
+               __radix_tree_delete_node(&mapping->page_tree, node);
+       }
 unlock:
        spin_unlock_irq(&mapping->tree_lock);
 }
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
        int             i;
 
        cleancache_invalidate_inode(mapping);
-       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+       if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
                return;
 
        /* Offsets within partial pages */
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
  */
 void truncate_inode_pages_final(struct address_space *mapping)
 {
-       unsigned long nrshadows;
+       unsigned long nrexceptional;
        unsigned long nrpages;
 
        /*
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
 
        /*
         * When reclaim installs eviction entries, it increases
-        * nrshadows first, then decreases nrpages.  Make sure we see
+        * nrexceptional first, then decreases nrpages.  Make sure we see
         * this in the right order or we might miss an entry.
         */
        nrpages = mapping->nrpages;
        smp_rmb();
-       nrshadows = mapping->nrshadows;
+       nrexceptional = mapping->nrexceptional;
 
-       if (nrpages || nrshadows) {
+       if (nrpages || nrexceptional) {
                /*
                 * As truncation uses a lockless tree lookup, cycle
                 * the tree lock to make sure any ongoing tree
index c108a6542d05d3e1c880c1f6ab540ced62156e54..4fb14ca5a41967696a6f769189eb5a59c6a91c0f 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /* Check if the vma is being used as a stack by this task */
-static int vm_is_stack_for_task(struct task_struct *t,
-                               struct vm_area_struct *vma)
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
 {
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
-/*
- * Check if the vma is being used as a stack.
- * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the task_struct of the task
- * that the vma is stack for. Must be called under rcu_read_lock().
- */
-struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group)
-{
-       if (vm_is_stack_for_task(task, vma))
-               return task;
-
-       if (in_group) {
-               struct task_struct *t;
-
-               for_each_thread(task, t) {
-                       if (vm_is_stack_for_task(t, vma))
-                               return t;
-               }
-       }
-
-       return NULL;
-}
-
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
index 9a6c0704211c856c300f67057fc202e25c9657ab..149fdf6c5c56f927f3613538c61b3c5831c80af9 100644 (file)
@@ -248,9 +248,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 
        if (tree) {
                spin_lock(&vmpr->sr_lock);
-               vmpr->tree_scanned += scanned;
+               scanned = vmpr->tree_scanned += scanned;
                vmpr->tree_reclaimed += reclaimed;
-               scanned = vmpr->scanned;
                spin_unlock(&vmpr->sr_lock);
 
                if (scanned < vmpressure_win)
index bd620b65db52680fc8a6e221fe90189c22dca2f1..71b1c29948dba30aab0a894ddc7c84eb62acde2b 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/oom.h>
 #include <linux/prefetch.h>
 #include <linux/printk.h>
+#include <linux/dax.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 * inode reclaim needs to empty out the radix tree or
                 * the nodes are lost.  Don't plant shadows behind its
                 * back.
+                *
+                * We also don't store shadows for DAX mappings because the
+                * only page cache pages found in these are zero pages
+                * covering holes, and because we don't want to mix DAX
+                * exceptional entries and shadow exceptional entries in the
+                * same page_tree.
                 */
                if (reclaimed && page_is_file_cache(page) &&
-                   !mapping_exiting(mapping))
+                   !mapping_exiting(mapping) && !dax_mapping(mapping))
                        shadow = workingset_eviction(mapping, page);
                __delete_from_page_cache(page, shadow, memcg);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -1436,7 +1443,7 @@ int isolate_lru_page(struct page *page)
        int ret = -EBUSY;
 
        VM_BUG_ON_PAGE(!page_count(page), page);
-       VM_BUG_ON_PAGE(PageTail(page), page);
+       WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
 
        if (PageLRU(page)) {
                struct zone *zone = page_zone(page);
index 64bd0aa13f75cc25247609542f0949c18995c8c1..084c6725b3734430483e7ea4fcf74e2ab67f7bfa 100644 (file)
@@ -1396,10 +1396,15 @@ static void vmstat_update(struct work_struct *w)
                 * Counters were updated so we expect more updates
                 * to occur in the future. Keep on running the
                 * update worker thread.
+                * If we were marked on cpu_stat_off clear the flag
+                * so that vmstat_shepherd doesn't schedule us again.
                 */
-               queue_delayed_work_on(smp_processor_id(), vmstat_wq,
-                       this_cpu_ptr(&vmstat_work),
-                       round_jiffies_relative(sysctl_stat_interval));
+               if (!cpumask_test_and_clear_cpu(smp_processor_id(),
+                                               cpu_stat_off)) {
+                       queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+                               this_cpu_ptr(&vmstat_work),
+                               round_jiffies_relative(sysctl_stat_interval));
+               }
        } else {
                /*
                 * We did not update any counters so the app may be in
@@ -1408,17 +1413,7 @@ static void vmstat_update(struct work_struct *w)
                 * Defer the checking for differentials to the
                 * shepherd thread on a different processor.
                 */
-               int r;
-               /*
-                * Shepherd work thread does not race since it never
-                * changes the bit if its zero but the cpu
-                * online / off line code may race if
-                * worker threads are still allowed during
-                * shutdown / startup.
-                */
-               r = cpumask_test_and_set_cpu(smp_processor_id(),
-                       cpu_stat_off);
-               VM_BUG_ON(r);
+               cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
        }
 }
 
@@ -1427,18 +1422,6 @@ static void vmstat_update(struct work_struct *w)
  * until the diffs stay at zero. The function is used by NOHZ and can only be
  * invoked when tick processing is not active.
  */
-void quiet_vmstat(void)
-{
-       if (system_state != SYSTEM_RUNNING)
-               return;
-
-       do {
-               if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
-                       cancel_delayed_work(this_cpu_ptr(&vmstat_work));
-
-       } while (refresh_cpu_vm_stats(false));
-}
-
 /*
  * Check if the diffs for a certain cpu indicate that
  * an update is needed.
@@ -1462,6 +1445,30 @@ static bool need_update(int cpu)
        return false;
 }
 
+void quiet_vmstat(void)
+{
+       if (system_state != SYSTEM_RUNNING)
+               return;
+
+       /*
+        * If we are already in hands of the shepherd then there
+        * is nothing for us to do here.
+        */
+       if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
+               return;
+
+       if (!need_update(smp_processor_id()))
+               return;
+
+       /*
+        * Just refresh counters and do not care about the pending delayed
+        * vmstat_update. It doesn't fire that often to matter and canceling
+        * it would be too expensive from this path.
+        * vmstat_shepherd will take care about that for us.
+        */
+       refresh_cpu_vm_stats(false);
+}
+
 
 /*
  * Shepherd worker thread that checks the
@@ -1479,18 +1486,25 @@ static void vmstat_shepherd(struct work_struct *w)
 
        get_online_cpus();
        /* Check processors whose vmstat worker threads have been disabled */
-       for_each_cpu(cpu, cpu_stat_off)
-               if (need_update(cpu) &&
-                       cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
-
-                       queue_delayed_work_on(cpu, vmstat_wq,
-                               &per_cpu(vmstat_work, cpu), 0);
+       for_each_cpu(cpu, cpu_stat_off) {
+               struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
+               if (need_update(cpu)) {
+                       if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+                               queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+               } else {
+                       /*
+                        * Cancel the work if quiet_vmstat has put this
+                        * cpu on cpu_stat_off because the work item might
+                        * be still scheduled
+                        */
+                       cancel_delayed_work(dw);
+               }
+       }
        put_online_cpus();
 
        schedule_delayed_work(&shepherd,
                round_jiffies_relative(sysctl_stat_interval));
-
 }
 
 static void __init start_shepherd_timer(void)
@@ -1498,7 +1512,7 @@ static void __init start_shepherd_timer(void)
        int cpu;
 
        for_each_possible_cpu(cpu)
-               INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu),
+               INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
                        vmstat_update);
 
        if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
index aa017133744b227bed7592ea6cc32f360c3e142c..61ead9e5549df171f43fa53936a4b8cbd86a21a5 100644 (file)
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
                        node->slots[i] = NULL;
                        BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
                        node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
-                       BUG_ON(!mapping->nrshadows);
-                       mapping->nrshadows--;
+                       BUG_ON(!mapping->nrexceptional);
+                       mapping->nrexceptional--;
                }
        }
        BUG_ON(node->count);
index bced8c074c1280c6ff175576cb035bf048f18ddd..7bc2208b6cc4c445286c20e01d9911ac9e75b9b2 100644 (file)
@@ -108,9 +108,7 @@ struct p9_poll_wait {
  * @unsent_req_list: accounting for requests that haven't been sent
  * @req: current request being processed (if any)
  * @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
  * @wpos: write position for current frame
  * @wsize: amount of data to write for current frame
  * @wbuf: current write buffer
@@ -131,9 +129,7 @@ struct p9_conn {
        struct list_head unsent_req_list;
        struct p9_req_t *req;
        char tmp_buf[7];
-       int rsize;
-       int rpos;
-       char *rbuf;
+       struct p9_fcall rc;
        int wpos;
        int wsize;
        char *wbuf;
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work)
        if (m->err < 0)
                return;
 
-       p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+       p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
 
-       if (!m->rbuf) {
-               m->rbuf = m->tmp_buf;
-               m->rpos = 0;
-               m->rsize = 7; /* start by reading header */
+       if (!m->rc.sdata) {
+               m->rc.sdata = m->tmp_buf;
+               m->rc.offset = 0;
+               m->rc.capacity = 7; /* start by reading header */
        }
 
        clear_bit(Rpending, &m->wsched);
-       p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
-                m, m->rpos, m->rsize, m->rsize-m->rpos);
-       err = p9_fd_read(m->client, m->rbuf + m->rpos,
-                                               m->rsize - m->rpos);
+       p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+                m, m->rc.offset, m->rc.capacity,
+                m->rc.capacity - m->rc.offset);
+       err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+                        m->rc.capacity - m->rc.offset);
        p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
-       if (err == -EAGAIN) {
+       if (err == -EAGAIN)
                goto end_clear;
-       }
 
        if (err <= 0)
                goto error;
 
-       m->rpos += err;
+       m->rc.offset += err;
 
-       if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
-               u16 tag;
+       /* header read in */
+       if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new header\n");
 
-               n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
-               if (n >= m->client->msize) {
+               err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+               if (err) {
+                       p9_debug(P9_DEBUG_ERROR,
+                                "error parsing header: %d\n", err);
+                       goto error;
+               }
+
+               if (m->rc.size >= m->client->msize) {
                        p9_debug(P9_DEBUG_ERROR,
-                                "requested packet size too big: %d\n", n);
+                                "requested packet size too big: %d\n",
+                                m->rc.size);
                        err = -EIO;
                        goto error;
                }
 
-               tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
                p9_debug(P9_DEBUG_TRANS,
-                        "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+                        "mux %p pkt: size: %d bytes tag: %d\n",
+                        m, m->rc.size, m->rc.tag);
 
-               m->req = p9_tag_lookup(m->client, tag);
+               m->req = p9_tag_lookup(m->client, m->rc.tag);
                if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
                        p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
-                                tag);
+                                m->rc.tag);
                        err = -EIO;
                        goto error;
                }
 
                if (m->req->rc == NULL) {
-                       m->req->rc = kmalloc(sizeof(struct p9_fcall) +
-                                               m->client->msize, GFP_NOFS);
-                       if (!m->req->rc) {
-                               m->req = NULL;
-                               err = -ENOMEM;
-                               goto error;
-                       }
+                       p9_debug(P9_DEBUG_ERROR,
+                                "No recv fcall for tag %d (req %p), disconnecting!\n",
+                                m->rc.tag, m->req);
+                       m->req = NULL;
+                       err = -EIO;
+                       goto error;
                }
-               m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
-               memcpy(m->rbuf, m->tmp_buf, m->rsize);
-               m->rsize = n;
+               m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+               memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+               m->rc.capacity = m->rc.size;
        }
 
-       /* not an else because some packets (like clunk) have no payload */
-       if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+       /* packet is read in
+        * not an else because some packets (like clunk) have no payload
+        */
+       if ((m->req) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new packet\n");
                spin_lock(&m->client->lock);
                if (m->req->status != REQ_STATUS_ERROR)
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work)
                list_del(&m->req->req_list);
                spin_unlock(&m->client->lock);
                p9_client_cb(m->client, m->req, status);
-               m->rbuf = NULL;
-               m->rpos = 0;
-               m->rsize = 0;
+               m->rc.sdata = NULL;
+               m->rc.offset = 0;
+               m->rc.capacity = 0;
                m->req = NULL;
        }
 
index 199bc76202d2552d23fe964f377bbf69eaf518b9..4acb1d5417aaf980bc7797c817eb9a9a350ecbf5 100644 (file)
@@ -658,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
        mutex_unlock(&virtio_9p_lock);
 
        if (!found) {
-               pr_err("no channels available\n");
+               pr_err("no channels available for device %s\n", devname);
                return ret;
        }
 
index d040365ba98e7f7cafcbedd20ac3b4c632603414..8a4cc2f7f0db2a277e4281d5be62d76c7446a716 100644 (file)
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
        /* check that it's our buffer */
        if (lowpan_is_ipv6(*skb_network_header(skb))) {
+               /* Pull off the 1-byte of 6lowpan header. */
+               skb_pull(skb, 1);
+
                /* Copy the packet so that the IPv6 header is
                 * properly aligned.
                 */
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
                local_skb->protocol = htons(ETH_P_IPV6);
                local_skb->pkt_type = PACKET_HOST;
+               local_skb->dev = dev;
 
                skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
 
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
                if (!local_skb)
                        goto drop;
 
+               local_skb->dev = dev;
+
                ret = iphc_decompress(local_skb, dev, chan);
                if (ret < 0) {
                        kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
                local_skb->protocol = htons(ETH_P_IPV6);
                local_skb->pkt_type = PACKET_HOST;
-               local_skb->dev = dev;
 
                if (give_skb_to_upper(local_skb, dev)
                                != NET_RX_SUCCESS) {
index 41b5f3813f02f1ed73ffecdc7d685645ded8c265..c78ee2dc93237475eb002ae61a737e0137c387f9 100644 (file)
@@ -688,21 +688,29 @@ static u8 update_white_list(struct hci_request *req)
         * command to remove it from the controller.
         */
        list_for_each_entry(b, &hdev->le_white_list, list) {
-               struct hci_cp_le_del_from_white_list cp;
+               /* If the device is neither in pend_le_conns nor
+                * pend_le_reports then remove it from the whitelist.
+                */
+               if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+                                              &b->bdaddr, b->bdaddr_type) &&
+                   !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+                                              &b->bdaddr, b->bdaddr_type)) {
+                       struct hci_cp_le_del_from_white_list cp;
+
+                       cp.bdaddr_type = b->bdaddr_type;
+                       bacpy(&cp.bdaddr, &b->bdaddr);
 
-               if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
-                                             &b->bdaddr, b->bdaddr_type) ||
-                   hci_pend_le_action_lookup(&hdev->pend_le_reports,
-                                             &b->bdaddr, b->bdaddr_type)) {
-                       white_list_entries++;
+                       hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+                                   sizeof(cp), &cp);
                        continue;
                }
 
-               cp.bdaddr_type = b->bdaddr_type;
-               bacpy(&cp.bdaddr, &b->bdaddr);
+               if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+                       /* White list can not be used with RPAs */
+                       return 0x00;
+               }
 
-               hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
-                           sizeof(cp), &cp);
+               white_list_entries++;
        }
 
        /* Since all no longer valid white list entries have been
index 39a5149f301085d45d37a94361e2b86486d852bd..eb4f5f24cbe36a1d4f524978d52d8558f4dbde80 100644 (file)
@@ -197,10 +197,20 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
                chan->sport = psm;
                err = 0;
        } else {
-               u16 p;
+               u16 p, start, end, incr;
+
+               if (chan->src_type == BDADDR_BREDR) {
+                       start = L2CAP_PSM_DYN_START;
+                       end = L2CAP_PSM_AUTO_END;
+                       incr = 2;
+               } else {
+                       start = L2CAP_PSM_LE_DYN_START;
+                       end = L2CAP_PSM_LE_DYN_END;
+                       incr = 1;
+               }
 
                err = -EINVAL;
-               for (p = 0x1001; p < 0x1100; p += 2)
+               for (p = start; p <= end; p += incr)
                        if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
                                chan->psm   = cpu_to_le16(p);
                                chan->sport = cpu_to_le16(p);
index 1bb5515270449e8115a0c169ea5b6380594c40a4..e4cae72895a72bebc4c95b430d688e83cfebfada 100644 (file)
@@ -58,7 +58,7 @@ static int l2cap_validate_bredr_psm(u16 psm)
                return -EINVAL;
 
        /* Restrict usage of well-known PSMs */
-       if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE))
+       if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        return 0;
@@ -67,11 +67,11 @@ static int l2cap_validate_bredr_psm(u16 psm)
 static int l2cap_validate_le_psm(u16 psm)
 {
        /* Valid LE_PSM ranges are defined only until 0x00ff */
-       if (psm > 0x00ff)
+       if (psm > L2CAP_PSM_LE_DYN_END)
                return -EINVAL;
 
        /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
-       if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE))
+       if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        return 0;
@@ -125,6 +125,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                        goto done;
        }
 
+       bacpy(&chan->src, &la.l2_bdaddr);
+       chan->src_type = la.l2_bdaddr_type;
+
        if (la.l2_cid)
                err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
        else
@@ -156,9 +159,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                break;
        }
 
-       bacpy(&chan->src, &la.l2_bdaddr);
-       chan->src_type = la.l2_bdaddr_type;
-
        if (chan->psm && bdaddr_type_is_le(chan->src_type))
                chan->mode = L2CAP_MODE_LE_FLOWCTL;
 
index ffed8a1d4f27634866c93d22b4ceb059b956cc91..4b175df35184b08b5695d966f66b129148c6c011 100644 (file)
@@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
                        hcon->dst_type = smp->remote_irk->addr_type;
                        queue_work(hdev->workqueue, &conn->id_addr_update_work);
                }
-
-               /* When receiving an indentity resolving key for
-                * a remote device that does not use a resolvable
-                * private address, just remove the key so that
-                * it is possible to use the controller white
-                * list for scanning.
-                *
-                * Userspace will have been told to not store
-                * this key at this point. So it is safe to
-                * just remove it.
-                */
-               if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
-                       list_del_rcu(&smp->remote_irk->list);
-                       kfree_rcu(smp->remote_irk, rcu);
-                       smp->remote_irk = NULL;
-               }
        }
 
        if (smp->csrk) {
index a1abe4936fe15299b7643b8944023050c5f938c1..3addc05b9a16676b07ca13bc6faf0cba62b3acfd 100644 (file)
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
        .notifier_call = br_device_event
 };
 
+/* called with RTNL */
 static int br_switchdev_event(struct notifier_block *unused,
                              unsigned long event, void *ptr)
 {
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
        struct switchdev_notifier_fdb_info *fdb_info;
        int err = NOTIFY_DONE;
 
-       rtnl_lock();
        p = br_port_get_rtnl(dev);
        if (!p)
                goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
        }
 
 out:
-       rtnl_unlock();
        return err;
 }
 
index 10d87753ed8737329c244b1ae7718e95c8abd118..9e43a315e6622028ba2b61fd8d36ab20305d611b 100644 (file)
@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
        void *ticket_buf = NULL;
        void *tp, *tpend;
        void **ptp;
-       struct ceph_timespec new_validity;
        struct ceph_crypto_key new_session_key;
        struct ceph_buffer *new_ticket_blob;
        unsigned long new_expires, new_renew_after;
@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
        if (ret)
                goto out;
 
-       ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
-       ceph_decode_timespec(&validity, &new_validity);
+       ceph_decode_timespec(&validity, dp);
+       dp += sizeof(struct ceph_timespec);
        new_expires = get_seconds() + validity.tv_sec;
        new_renew_after = new_expires - (validity.tv_sec / 4);
        dout(" expires=%lu renew_after=%lu\n", new_expires,
@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
                ceph_buffer_put(th->ticket_blob);
        th->session_key = new_session_key;
        th->ticket_blob = new_ticket_blob;
-       th->validity = new_validity;
        th->secret_id = new_secret_id;
        th->expires = new_expires;
        th->renew_after = new_renew_after;
+       th->have_key = true;
        dout(" got ticket service %d (%s) secret_id %lld len %d\n",
             type, ceph_entity_type_name(type), th->secret_id,
             (int)th->ticket_blob->vec.iov_len);
@@ -384,6 +383,24 @@ bad:
        return -ERANGE;
 }
 
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+       if (!th->have_key)
+               return true;
+
+       return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+       if (th->have_key) {
+               if (get_seconds() >= th->expires)
+                       th->have_key = false;
+       }
+
+       return th->have_key;
+}
+
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
 {
        int want = ac->want_keys;
@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
                        continue;
 
                th = get_ticket_handler(ac, service);
-
                if (IS_ERR(th)) {
                        *pneed |= service;
                        continue;
                }
 
-               if (get_seconds() >= th->renew_after)
+               if (need_key(th))
                        *pneed |= service;
-               if (get_seconds() >= th->expires)
+               if (!have_key(th))
                        xi->have_keys &= ~service;
        }
 }
 
-
 static int ceph_x_build_request(struct ceph_auth_client *ac,
                                void *buf, void *end)
 {
@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
        ac->private = NULL;
 }
 
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
-                                  int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
 {
        struct ceph_x_ticket_handler *th;
 
        th = get_ticket_handler(ac, peer_type);
        if (!IS_ERR(th))
-               memset(&th->validity, 0, sizeof(th->validity));
+               th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+                                        int peer_type)
+{
+       /*
+        * We are to invalidate a service ticket in the hopes of
+        * getting a new, hopefully more valid, one.  But, we won't get
+        * it unless our AUTH ticket is good, so invalidate AUTH ticket
+        * as well, just in case.
+        */
+       invalidate_ticket(ac, peer_type);
+       invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
 }
 
 static int calcu_signature(struct ceph_x_authorizer *au,
index e8b7c6917d472f69d356252415efcb76a4afd3cf..40b1a3cf7397352e4673becccdbbb083c88f04d8 100644 (file)
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
        unsigned int service;
 
        struct ceph_crypto_key session_key;
-       struct ceph_timespec validity;
+       bool have_key;
 
        u64 secret_id;
        struct ceph_buffer *ticket_blob;
index 393bfb22d5bbafd83c20f5953b98c6709b637452..5fcfb98f309efb5018f84628902cd1711fc36705 100644 (file)
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
  * @local_retries: localized retries
  * @local_fallback_retries: localized fallback retries
  * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @stable: stable mode starts rep=0 in the recursive call for all replicas
  * @vary_r: pass r to recursive calls
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  * @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                               unsigned int local_fallback_retries,
                               int recurse_to_leaf,
                               unsigned int vary_r,
+                              unsigned int stable,
                               int *out2,
                               int parent_r)
 {
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
        int collide, reject;
        int count = out_size;
 
-       dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+       dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
                recurse_to_leaf ? "_LEAF" : "",
                bucket->id, x, outpos, numrep,
                tries, recurse_tries, local_retries, local_fallback_retries,
-               parent_r);
+               parent_r, stable);
 
-       for (rep = outpos; rep < numrep && count > 0 ; rep++) {
+       for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
                /* keep trying until we get a non-out, non-colliding item */
                ftotal = 0;
                skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
                                                if (crush_choose_firstn(map,
                                                         map->buckets[-1-item],
                                                         weight, weight_max,
-                                                        x, outpos+1, 0,
+                                                        x, stable ? 1 : outpos+1, 0,
                                                         out2, outpos, count,
                                                         recurse_tries, 0,
                                                         local_retries,
                                                         local_fallback_retries,
                                                         0,
                                                         vary_r,
+                                                        stable,
                                                         NULL,
                                                         sub_r) <= outpos)
                                                        /* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
        int choose_local_fallback_retries = map->choose_local_fallback_tries;
 
        int vary_r = map->chooseleaf_vary_r;
+       int stable = map->chooseleaf_stable;
 
        if ((__u32)ruleno >= map->max_rules) {
                dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
                case CRUSH_RULE_TAKE:
                        if ((curstep->arg1 >= 0 &&
                             curstep->arg1 < map->max_devices) ||
-                           (-1-curstep->arg1 < map->max_buckets &&
+                           (-1-curstep->arg1 >= 0 &&
+                            -1-curstep->arg1 < map->max_buckets &&
                             map->buckets[-1-curstep->arg1])) {
                                w[0] = curstep->arg1;
                                wsize = 1;
@@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
                                vary_r = curstep->arg1;
                        break;
 
+               case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
+                       if (curstep->arg1 >= 0)
+                               stable = curstep->arg1;
+                       break;
+
                case CRUSH_RULE_CHOOSELEAF_FIRSTN:
                case CRUSH_RULE_CHOOSE_FIRSTN:
                        firstn = 1;
@@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
                        osize = 0;
 
                        for (i = 0; i < wsize; i++) {
+                               int bno;
                                /*
                                 * see CRUSH_N, CRUSH_N_MINUS macros.
                                 * basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
                                                continue;
                                }
                                j = 0;
+                               /* make sure bucket id is valid */
+                               bno = -1 - w[i];
+                               if (bno < 0 || bno >= map->max_buckets) {
+                                       /* w[i] is probably CRUSH_ITEM_NONE */
+                                       dprintk("  bad w[i] %d\n", w[i]);
+                                       continue;
+                               }
                                if (firstn) {
                                        int recurse_tries;
                                        if (choose_leaf_tries)
@@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
                                                recurse_tries = choose_tries;
                                        osize += crush_choose_firstn(
                                                map,
-                                               map->buckets[-1-w[i]],
+                                               map->buckets[bno],
                                                weight, weight_max,
                                                x, numrep,
                                                curstep->arg2,
@@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
                                                choose_local_fallback_retries,
                                                recurse_to_leaf,
                                                vary_r,
+                                               stable,
                                                c+osize,
                                                0);
                                } else {
@@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
                                                    numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
-                                               map->buckets[-1-w[i]],
+                                               map->buckets[bno],
                                                weight, weight_max,
                                                x, out_size, numrep,
                                                curstep->arg2,
index 9981039ef4ffcca29081b83a5e06a81ce6871f20..9cfedf565f5b236b5ede7974466cf62af6ad995c 100644 (file)
@@ -23,9 +23,6 @@
 #include <linux/ceph/pagelist.h>
 #include <linux/export.h>
 
-#define list_entry_next(pos, member)                                   \
-       list_entry(pos->member.next, typeof(*pos), member)
-
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
  * hosts in the system.  The messenger provides ordered and reliable
@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con)
        }
        con->in_seq = 0;
        con->in_seq_acked = 0;
+
+       con->out_skip = 0;
 }
 
 /*
@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
 
 static void con_out_kvec_reset(struct ceph_connection *con)
 {
+       BUG_ON(con->out_skip);
+
        con->out_kvec_left = 0;
        con->out_kvec_bytes = 0;
        con->out_kvec_cur = &con->out_kvec[0];
@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
 static void con_out_kvec_add(struct ceph_connection *con,
                                size_t size, void *data)
 {
-       int index;
+       int index = con->out_kvec_left;
 
-       index = con->out_kvec_left;
+       BUG_ON(con->out_skip);
        BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
 
        con->out_kvec[index].iov_len = size;
@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
        con->out_kvec_bytes += size;
 }
 
+/*
+ * Chop off a kvec from the end.  Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+       int off = con->out_kvec_cur - con->out_kvec;
+       int skip = 0;
+
+       if (con->out_kvec_bytes > 0) {
+               skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+               BUG_ON(con->out_kvec_bytes < skip);
+               BUG_ON(!con->out_kvec_left);
+               con->out_kvec_bytes -= skip;
+               con->out_kvec_left--;
+       }
+
+       return skip;
+}
+
 #ifdef CONFIG_BLOCK
 
 /*
@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
        /* Move on to the next page */
 
        BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
-       cursor->page = list_entry_next(cursor->page, lru);
+       cursor->page = list_next_entry(cursor->page, lru);
        cursor->last_piece = cursor->resid <= PAGE_SIZE;
 
        return true;
@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
        if (!cursor->resid && cursor->total_resid) {
                WARN_ON(!cursor->last_piece);
                BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
-               cursor->data = list_entry_next(cursor->data, links);
+               cursor->data = list_next_entry(cursor->data, links);
                __ceph_msg_data_cursor_init(cursor);
                new_piece = true;
        }
@@ -1197,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
        m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
 
        dout("prepare_write_message_footer %p\n", con);
-       con->out_kvec_is_msg = true;
        con->out_kvec[v].iov_base = &m->footer;
        if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
                if (con->ops->sign_message)
@@ -1225,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con)
        u32 crc;
 
        con_out_kvec_reset(con);
-       con->out_kvec_is_msg = true;
        con->out_msg_done = false;
 
        /* Sneak an ack in there first?  If we can get it into the same
@@ -1265,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con)
 
        /* tag + hdr + front + middle */
        con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
-       con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+       con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
        con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
 
        if (m->middle)
                con_out_kvec_add(con, m->middle->vec.iov_len,
                        m->middle->vec.iov_base);
 
-       /* fill in crc (except data pages), footer */
+       /* fill in hdr crc and finalize hdr */
        crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
        con->out_msg->hdr.crc = cpu_to_le32(crc);
-       con->out_msg->footer.flags = 0;
+       memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
 
+       /* fill in front and middle crc, footer */
        crc = crc32c(0, m->front.iov_base, m->front.iov_len);
        con->out_msg->footer.front_crc = cpu_to_le32(crc);
        if (m->middle) {
@@ -1288,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con)
        dout("%s front_crc %u middle_crc %u\n", __func__,
             le32_to_cpu(con->out_msg->footer.front_crc),
             le32_to_cpu(con->out_msg->footer.middle_crc));
+       con->out_msg->footer.flags = 0;
 
        /* is there a data payload? */
        con->out_msg->footer.data_crc = 0;
@@ -1492,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con)
                }
        }
        con->out_kvec_left = 0;
-       con->out_kvec_is_msg = false;
        ret = 1;
 out:
        dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1584,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con)
 {
        int ret;
 
+       dout("%s %p %d left\n", __func__, con, con->out_skip);
        while (con->out_skip > 0) {
                size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
 
@@ -2506,13 +2528,13 @@ more:
 
 more_kvec:
        /* kvec data queued? */
-       if (con->out_skip) {
-               ret = write_partial_skip(con);
+       if (con->out_kvec_left) {
+               ret = write_partial_kvec(con);
                if (ret <= 0)
                        goto out;
        }
-       if (con->out_kvec_left) {
-               ret = write_partial_kvec(con);
+       if (con->out_skip) {
+               ret = write_partial_skip(con);
                if (ret <= 0)
                        goto out;
        }
@@ -2805,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con)
 
 static void con_fault_finish(struct ceph_connection *con)
 {
+       dout("%s %p\n", __func__, con);
+
        /*
         * in case we faulted due to authentication, invalidate our
         * current tickets so that we can get new ones.
         */
-       if (con->auth_retry && con->ops->invalidate_authorizer) {
-               dout("calling invalidate_authorizer()\n");
-               con->ops->invalidate_authorizer(con);
+       if (con->auth_retry) {
+               dout("auth_retry %d, invalidating\n", con->auth_retry);
+               if (con->ops->invalidate_authorizer)
+                       con->ops->invalidate_authorizer(con);
+               con->auth_retry = 0;
        }
 
        if (con->ops->fault)
@@ -3050,16 +3076,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
                ceph_msg_put(msg);
        }
        if (con->out_msg == msg) {
-               dout("%s %p msg %p - was sending\n", __func__, con, msg);
-               con->out_msg = NULL;
-               if (con->out_kvec_is_msg) {
-                       con->out_skip = con->out_kvec_bytes;
-                       con->out_kvec_is_msg = false;
+               BUG_ON(con->out_skip);
+               /* footer */
+               if (con->out_msg_done) {
+                       con->out_skip += con_out_kvec_skip(con);
+               } else {
+                       BUG_ON(!msg->data_length);
+                       if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+                               con->out_skip += sizeof(msg->footer);
+                       else
+                               con->out_skip += sizeof(msg->old_footer);
                }
+               /* data, middle, front */
+               if (msg->data_length)
+                       con->out_skip += msg->cursor.total_resid;
+               if (msg->middle)
+                       con->out_skip += con_out_kvec_skip(con);
+               con->out_skip += con_out_kvec_skip(con);
+
+               dout("%s %p msg %p - was sending, will write %d skip %d\n",
+                    __func__, con, msg, con->out_kvec_bytes, con->out_skip);
                msg->hdr.seq = 0;
-
+               con->out_msg = NULL;
                ceph_msg_put(msg);
        }
+
        mutex_unlock(&con->mutex);
 }
 
@@ -3361,9 +3402,7 @@ static void ceph_msg_free(struct ceph_msg *m)
 static void ceph_msg_release(struct kref *kref)
 {
        struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-       LIST_HEAD(data);
-       struct list_head *links;
-       struct list_head *next;
+       struct ceph_msg_data *data, *next;
 
        dout("%s %p\n", __func__, m);
        WARN_ON(!list_empty(&m->list_head));
@@ -3376,12 +3415,8 @@ static void ceph_msg_release(struct kref *kref)
                m->middle = NULL;
        }
 
-       list_splice_init(&m->data, &data);
-       list_for_each_safe(links, next, &data) {
-               struct ceph_msg_data *data;
-
-               data = list_entry(links, struct ceph_msg_data, links);
-               list_del_init(links);
+       list_for_each_entry_safe(data, next, &m->data, links) {
+               list_del_init(&data->links);
                ceph_msg_data_destroy(data);
        }
        m->data_length = 0;
index edda01626a459efbfdaeebd46fd6a0b13a037879..de85dddc3dc08cfeee81435c4475a6d975c4cd96 100644 (file)
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
        return monc->client->have_fsid && monc->auth->global_id > 0;
 }
 
-/*
- * The monitor responds with mount ack indicate mount success.  The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
 static void ceph_monc_handle_map(struct ceph_mon_client *monc,
                                 struct ceph_msg *msg)
 {
index f8f235930d887adf1df94314b18c719638328df0..3534e12683d3b2b6efa9988faff9e9b3249e6fa6 100644 (file)
@@ -1770,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
        u32 osdmap_epoch;
        int already_completed;
        u32 bytes;
+       u8 decode_redir;
        unsigned int i;
 
        tid = le64_to_cpu(msg->hdr.tid);
@@ -1841,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                p += 8 + 4; /* skip replay_version */
                p += 8; /* skip user_version */
 
+               if (le16_to_cpu(msg->hdr.version) >= 7)
+                       ceph_decode_8_safe(&p, end, decode_redir, bad_put);
+               else
+                       decode_redir = 1;
+       } else {
+               decode_redir = 0;
+       }
+
+       if (decode_redir) {
                err = ceph_redirect_decode(&p, end, &redir);
                if (err)
                        goto bad_put;
index 7d8f581d9f1f7987b8d7051160c34f42ad2f5e73..243574c8cf33807fcaf9374530358f1e44080764 100644 (file)
@@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = ceph_decode_32(p);
         c->choose_local_fallback_tries =  ceph_decode_32(p);
         c->choose_total_tries = ceph_decode_32(p);
-        dout("crush decode tunable choose_local_tries = %d",
+        dout("crush decode tunable choose_local_tries = %d\n",
              c->choose_local_tries);
-        dout("crush decode tunable choose_local_fallback_tries = %d",
+        dout("crush decode tunable choose_local_fallback_tries = %d\n",
              c->choose_local_fallback_tries);
-        dout("crush decode tunable choose_total_tries = %d",
+        dout("crush decode tunable choose_total_tries = %d\n",
              c->choose_total_tries);
 
        ceph_decode_need(p, end, sizeof(u32), done);
        c->chooseleaf_descend_once = ceph_decode_32(p);
-       dout("crush decode tunable chooseleaf_descend_once = %d",
+       dout("crush decode tunable chooseleaf_descend_once = %d\n",
             c->chooseleaf_descend_once);
 
        ceph_decode_need(p, end, sizeof(u8), done);
        c->chooseleaf_vary_r = ceph_decode_8(p);
-       dout("crush decode tunable chooseleaf_vary_r = %d",
+       dout("crush decode tunable chooseleaf_vary_r = %d\n",
             c->chooseleaf_vary_r);
 
+       /* skip straw_calc_version, allowed_bucket_algs */
+       ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
+       *p += sizeof(u8) + sizeof(u32);
+
+       ceph_decode_need(p, end, sizeof(u8), done);
+       c->chooseleaf_stable = ceph_decode_8(p);
+       dout("crush decode tunable chooseleaf_stable = %d\n",
+            c->chooseleaf_stable);
+
 done:
        dout("crush_decode success\n");
        return c;
index cc9e3652cf93a6306e6f614f966e0fe7cf17a10e..8cba3d852f251c503b193823b71b27aaef3fb3ae 100644 (file)
@@ -4351,6 +4351,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                diffs |= p->vlan_tci ^ skb->vlan_tci;
+               diffs |= skb_metadata_dst_cmp(p, skb);
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
                                                      skb_mac_header(skb));
@@ -4548,10 +4549,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
                break;
 
        case GRO_MERGED_FREE:
-               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+                       skb_dst_drop(skb);
                        kmem_cache_free(skbuff_head_cache, skb);
-               else
+               } else {
                        __kfree_skb(skb);
+               }
                break;
 
        case GRO_HELD:
index 1df98c55744029de02522e34ed00c3cd2ca03af1..e92b759d906c1bbcad5ff3ecc977d6393df90361 100644 (file)
@@ -93,10 +93,17 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
  *  @sk2: Socket belonging to the existing reuseport group.
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
 {
        struct sock_reuseport *reuse;
 
+       if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+               int err = reuseport_alloc(sk2);
+
+               if (err)
+                       return err;
+       }
+
        spin_lock_bh(&reuseport_lock);
        reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock)),
index c22920525e5d844bd7e4210b233440b883c2681e..775824720b6b57d68460fc8e05915e479f221414 100644 (file)
@@ -353,6 +353,7 @@ config INET_ESP
        select CRYPTO_CBC
        select CRYPTO_SHA1
        select CRYPTO_DES
+       select CRYPTO_ECHAINIV
        ---help---
          Support for IPsec ESP.
 
index 744e5936c10d7ec555d1ca621f5bd4be57f1c72b..d07fc076bea0a4bc96f68075fb3bb79b95007e63 100644 (file)
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
 
        if (!n->tn_bits)
                kmem_cache_free(trie_leaf_kmem, n);
-       else if (n->tn_bits <= TNODE_KMALLOC_MAX)
-               kfree(n);
        else
-               vfree(n);
+               kvfree(n);
 }
 
 #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
@@ -1396,9 +1394,10 @@ found:
                struct fib_info *fi = fa->fa_info;
                int nhsel, err;
 
-               if ((index >= (1ul << fa->fa_slen)) &&
-                   ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
-                       continue;
+               if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
+                       if (index >= (1ul << fa->fa_slen))
+                               continue;
+               }
                if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
                        continue;
                if (fi->fib_dead)
index 8bb8e7ad85483234d8a852782515ca11c93af68d..6029157a19ed1632fbf642416c0199829bf6d7ff 100644 (file)
@@ -361,13 +361,20 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
                                 req->id.idiag_dport, req->id.idiag_src[0],
                                 req->id.idiag_sport, req->id.idiag_if);
 #if IS_ENABLED(CONFIG_IPV6)
-       else if (req->sdiag_family == AF_INET6)
-               sk = inet6_lookup(net, hashinfo,
-                                 (struct in6_addr *)req->id.idiag_dst,
-                                 req->id.idiag_dport,
-                                 (struct in6_addr *)req->id.idiag_src,
-                                 req->id.idiag_sport,
-                                 req->id.idiag_if);
+       else if (req->sdiag_family == AF_INET6) {
+               if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+                   ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+                       sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+                                        req->id.idiag_dport, req->id.idiag_src[3],
+                                        req->id.idiag_sport, req->id.idiag_if);
+               else
+                       sk = inet6_lookup(net, hashinfo,
+                                         (struct in6_addr *)req->id.idiag_dst,
+                                         req->id.idiag_dport,
+                                         (struct in6_addr *)req->id.idiag_src,
+                                         req->id.idiag_sport,
+                                         req->id.idiag_if);
+       }
 #endif
        else
                return ERR_PTR(-EINVAL);
index 3f00810b7288991f83ba147a65ac4f78e68ae594..187c6fcc3027779ba31d0721eb4a7389addaaf8a 100644 (file)
@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
        struct ipq *qp;
 
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+       skb_orphan(skb);
 
        /* Lookup (or create) queue header */
        qp = ip_find(net, ip_hdr(skb), user, vif);
index b1209b63381f6f9ae81cf258ad3724e1b6a6b9d8..d77eb0c3b6842bc5605a882f9ad46c609dcdeb50 100644 (file)
@@ -316,7 +316,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
 
-       if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
+       if (sysctl_ip_early_demux &&
+           !skb_dst(skb) &&
+           !skb->sk &&
+           !ip_is_fragment(iph)) {
                const struct net_protocol *ipprot;
                int protocol = iph->protocol;
 
index 67f7c9de0b16689dd1e7d075882b9477ac5e2bf9..2ed9dd2b5f2f5a6ea3ea52208e5ec1424a501fc5 100644 (file)
@@ -143,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata;
 
 /* Persistent data: */
 
+#ifdef IPCONFIG_DYNAMIC
 static int ic_proto_used;                      /* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
 static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
 static u8 ic_domain[64];               /* DNS (not NIS) domain name */
 
index 6fb869f646bf7a15b2f012cc1982c2a9f3d5935f..a04dee536b8ef8b5f4c1a085563d5d37b6fa118b 100644 (file)
@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
 {
        int err;
 
-       skb_orphan(skb);
-
        local_bh_disable();
        err = ip_defrag(net, skb, user);
        local_bh_enable();
index fd17eec9352517f68b86fcd08bf5b215bfa6fa46..19746b3fcbbe68db18d774c1f0178e2e5a418952 100644 (file)
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include <asm/unaligned.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -2638,6 +2639,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
        unsigned int start;
+       u64 rate64;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
@@ -2703,15 +2705,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_total_retrans = tp->total_retrans;
 
        rate = READ_ONCE(sk->sk_pacing_rate);
-       info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       put_unaligned(rate64, &info->tcpi_pacing_rate);
 
        rate = READ_ONCE(sk->sk_max_pacing_rate);
-       info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       put_unaligned(rate64, &info->tcpi_max_pacing_rate);
 
        do {
                start = u64_stats_fetch_begin_irq(&tp->syncp);
-               info->tcpi_bytes_acked = tp->bytes_acked;
-               info->tcpi_bytes_received = tp->bytes_received;
+               put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+               put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
        } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
index 0003d409fec5beec010050ef5ccafd58bf2f1501..1c2a73406261921fbea84333eabe3867106f980a 100644 (file)
@@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
-       int cnt, oldcnt;
-       int err;
+       int cnt, oldcnt, lost;
        unsigned int mss;
        /* Use SACK to deduce losses of new sequences sent during recovery */
        const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                                break;
 
                        mss = tcp_skb_mss(skb);
-                       err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
-                                          mss, GFP_ATOMIC);
-                       if (err < 0)
+                       /* If needed, chop off the prefix to mark as lost. */
+                       lost = (packets - oldcnt) * mss;
+                       if (lost < skb->len &&
+                           tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
                                break;
                        cnt = packets;
                }
@@ -2366,8 +2366,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
                        tp->snd_ssthresh = tp->prior_ssthresh;
                        tcp_ecn_withdraw_cwr(tp);
                }
-       } else {
-               tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
        }
        tp->snd_cwnd_stamp = tcp_time_stamp;
        tp->undo_marker = 0;
index 5ced3e4013e3c2119f43b1674e69ff18e281e664..a4d523709ab30a2171edf76b296d3f8e01fef7e4 100644 (file)
@@ -707,7 +707,8 @@ release_sk1:
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+                           struct sk_buff *skb, u32 seq, u32 ack,
                            u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key,
                            int reply_flags, u8 tos)
@@ -722,7 +723,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
                        ];
        } rep;
        struct ip_reply_arg arg;
-       struct net *net = dev_net(skb_dst(skb)->dev);
 
        memset(&rep.th, 0, sizeof(struct tcphdr));
        memset(&arg, 0, sizeof(arg));
@@ -784,7 +784,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
        struct inet_timewait_sock *tw = inet_twsk(sk);
        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-       tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+       tcp_v4_send_ack(sock_net(sk), skb,
+                       tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcp_time_stamp + tcptw->tw_ts_offset,
                        tcptw->tw_ts_recent,
@@ -803,8 +804,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
         */
-       tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
-                       tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+       u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+                                            tcp_sk(sk)->snd_nxt;
+
+       tcp_v4_send_ack(sock_net(sk), skb, seq,
                        tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
                        tcp_time_stamp,
                        req->ts_recent,
index dc45b538e237b908f4d6c6337b4c533a4236735a..be0b21852b138ebc5eed9caf37740cbe1cb1abe0 100644 (file)
@@ -499,6 +499,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
        struct sock *sk, *result;
        struct hlist_nulls_node *node;
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
 begin:
@@ -512,14 +513,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
-                                                           sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
+                                                       sizeof(struct udphdr));
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -563,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
        unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
        struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
        rcu_read_lock();
@@ -601,14 +607,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
index bb7dabe2ebbf8aabc624af02c00dbc2b33ad544a..40c897515ddc44d57c434dcd761c2ddc5e7fad50 100644 (file)
@@ -69,6 +69,7 @@ config INET6_ESP
        select CRYPTO_CBC
        select CRYPTO_SHA1
        select CRYPTO_DES
+       select CRYPTO_ECHAINIV
        ---help---
          Support for IPsec ESP.
 
index 517c55b01ba84b55a0004ce9505d14c4a3951cfc..428162155280ca2af782ea9fd9fa26e0d1666d89 100644 (file)
@@ -162,6 +162,9 @@ ipv4_connected:
        fl6.fl6_dport = inet->inet_dport;
        fl6.fl6_sport = inet->inet_sport;
 
+       if (!fl6.flowi6_oif)
+               fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
        if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
                fl6.flowi6_oif = np->mcast_oif;
 
index 23de98f976d5b7c846e42e9260acbac3b1632c17..a163102f1803e5f9eb4a60c501bd4adcc27bd62b 100644 (file)
@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
        struct rt6_info *rt;
 #endif
        int err;
+       int flags = 0;
 
        /* The correct way to handle this would be to do
         * ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
                        dst_release(*dst);
                        *dst = NULL;
                }
+
+               if (fl6->flowi6_oif)
+                       flags |= RT6_LOOKUP_F_IFACE;
        }
 
        if (!*dst)
-               *dst = ip6_route_output(net, sk, fl6);
+               *dst = ip6_route_output_flags(net, sk, fl6, flags);
 
        err = (*dst)->error;
        if (err)
index 3c8834bc822d2b64b8d7f4b33a9042dd279c584a..ed446639219c3ae8832a19a1f944e4dd6ddc6302 100644 (file)
@@ -1183,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-                                   struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+                                        struct flowi6 *fl6, int flags)
 {
        struct dst_entry *dst;
-       int flags = 0;
        bool any_src;
 
        dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1208,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
 
        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 }
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
index e794ef66a40135f33b5731a1059a6f45d0fc90b7..2066d1c25a11bb18bbe75caf4abe38eeccaa4b61 100644 (file)
@@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev)
        if ((__force u16)t->parms.i_flags & SIT_ISATAP)
                dev->priv_flags |= IFF_ISATAP;
 
+       dev->rtnl_link_ops = &sit_link_ops;
+
        err = register_netdevice(dev);
        if (err < 0)
                goto out;
 
        ipip6_tunnel_clone_6rd(dev, sitn);
 
-       dev->rtnl_link_ops = &sit_link_ops;
-
        dev_hold(dev);
 
        ipip6_tunnel_link(sitn, t);
index 5d2c2afffe7b2c0790bc00505c518d21eb4b589b..22e28a44e3c88c6e52ff59ea1c7dd8a3e964e701 100644 (file)
@@ -257,6 +257,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
        struct sock *sk, *result;
        struct hlist_nulls_node *node;
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
 begin:
@@ -270,14 +271,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
-                                                           sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
+                                                       sizeof(struct udphdr));
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
@@ -321,6 +326,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
        unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
        struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
        int score, badness, matches = 0, reuseport = 0;
+       bool select_ok = true;
        u32 hash = 0;
 
        rcu_read_lock();
@@ -358,14 +364,18 @@ begin:
                        badness = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
-                               struct sock *sk2;
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
-                               sk2 = reuseport_select_sock(sk, hash, skb,
+                               if (select_ok) {
+                                       struct sock *sk2;
+
+                                       sk2 = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (sk2) {
-                                       result = sk2;
-                                       goto found;
+                                       if (sk2) {
+                                               result = sk2;
+                                               select_ok = false;
+                                               goto found;
+                                       }
                                }
                                matches = 1;
                        }
index 3c4caa60c9265dd95f62d7a3aba84d40cdba0492..5728e76ca6d51adabef2cde3e06b0b9184c2d07e 100644 (file)
@@ -134,11 +134,10 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
                return -1;
        }
        skb_put(skb, count);
+       pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
 
        spin_unlock_irqrestore(&self->spinlock, flags);
 
-       pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
-
        if (flush) {
                /* ircomm_tty_do_softint will take care of the rest */
                schedule_work(&self->tqueue);
index ef50a94d3eb726f75d4202a7a15c7dd1f812a3aa..fc3598a922b071503514af87deb603a154e2b33b 100644 (file)
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
        if (!addr || addr->sa_family != AF_IUCV)
                return -EINVAL;
 
+       if (addr_len < sizeof(struct sockaddr_iucv))
+               return -EINVAL;
+
        lock_sock(sk);
        if (sk->sk_state != IUCV_OPEN) {
                err = -EBADFD;
index f7fc0e00497fd73f858833e2cb54ec3058b177fe..978d3bc31df727468602a7b234d10b5b1e9ba35c 100644 (file)
@@ -1733,7 +1733,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
                if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
                        continue;
                sdata->u.ibss.last_scan_completed = jiffies;
-               ieee80211_queue_work(&local->hw, &sdata->work);
        }
        mutex_unlock(&local->iflist_mtx);
 }
index 6bcf0faa4a89dbf38ee125099ad8c328f378331f..8190bf27ebfff8b0a594b7c029130dc9b1b99957 100644 (file)
@@ -248,6 +248,7 @@ static void ieee80211_restart_work(struct work_struct *work)
 
        /* wait for scan work complete */
        flush_workqueue(local->workqueue);
+       flush_work(&local->sched_scan_stopped_work);
 
        WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
             "%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@ static void ieee80211_restart_work(struct work_struct *work)
        list_for_each_entry(sdata, &local->interfaces, list)
                flush_delayed_work(&sdata->dec_tailroom_needed_wk);
        ieee80211_scan_cancel(local);
+
+       /* make sure any new ROC will consider local->in_reconfig */
+       flush_delayed_work(&local->roc_work);
+       flush_work(&local->hw_roc_done);
+
        ieee80211_reconfig(local);
        rtnl_unlock();
 }
index fa28500f28fd9fd8d452602b0407741589f12e36..6f85b6ab8e51015ab9a0520822fc4daff6af92b0 100644 (file)
@@ -1370,17 +1370,6 @@ out:
        sdata_unlock(sdata);
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
-       struct ieee80211_sub_if_data *sdata;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(sdata, &local->interfaces, list)
-               if (ieee80211_vif_is_mesh(&sdata->vif) &&
-                   ieee80211_sdata_running(sdata))
-                       ieee80211_queue_work(&local->hw, &sdata->work);
-       rcu_read_unlock();
-}
 
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
index a1596344c3ba1c5fe590767f3c0483310d63121a..4a8019f79fb2cba035a316b0a8d56a7e8f25d614 100644 (file)
@@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
        return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
 }
 
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
 void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
 void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_stop(void);
 #else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
 static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
 { return false; }
 static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
index 1c342e2592c4ab8af62871a4e44da0f3029140b0..bfbb1acafdd1f004ff4960b2238fbc5343302f38 100644 (file)
@@ -4005,8 +4005,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
                if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
                        ieee80211_queue_work(&sdata->local->hw,
                                             &sdata->u.mgd.monitor_work);
-               /* and do all the other regular work too */
-               ieee80211_queue_work(&sdata->local->hw, &sdata->work);
        }
 }
 
index 8b2f4eaac2ba65f8e25ae47dc316bf288ef8e0e9..55a9c5b94ce128d06ffb3154173fa2ea62a53f9b 100644 (file)
@@ -252,14 +252,11 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
 static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
                                         unsigned long start_time)
 {
-       struct ieee80211_local *local = roc->sdata->local;
-
        if (WARN_ON(roc->notified))
                return;
 
        roc->start_time = start_time;
        roc->started = true;
-       roc->hw_begun = true;
 
        if (roc->mgmt_tx_cookie) {
                if (!WARN_ON(!roc->frame)) {
@@ -274,9 +271,6 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
        }
 
        roc->notified = true;
-
-       if (!local->ops->remain_on_channel)
-               ieee80211_recalc_sw_work(local, start_time);
 }
 
 static void ieee80211_hw_roc_start(struct work_struct *work)
@@ -291,6 +285,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
                if (!roc->started)
                        break;
 
+               roc->hw_begun = true;
                ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
        }
 
@@ -413,6 +408,10 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
                return;
        }
 
+       /* defer roc if driver is not started (i.e. during reconfig) */
+       if (local->in_reconfig)
+               return;
+
        roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
                               list);
 
@@ -534,8 +533,10 @@ ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local,
         * begin, otherwise they'll both be marked properly by the work
         * struct that runs once the driver notifies us of the beginning
         */
-       if (cur_roc->hw_begun)
+       if (cur_roc->hw_begun) {
+               new_roc->hw_begun = true;
                ieee80211_handle_roc_started(new_roc, now);
+       }
 
        return true;
 }
@@ -658,6 +659,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
                        queued = true;
                        roc->on_channel = tmp->on_channel;
                        ieee80211_handle_roc_started(roc, now);
+                       ieee80211_recalc_sw_work(local, now);
                        break;
                }
 
index a413e52f7691418116d928f684a0fc27b308c2d6..ae980ce8daff6023bd8e5f113dee6a57b13408cc 100644 (file)
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
        bool was_scanning = local->scanning;
        struct cfg80211_scan_request *scan_req;
        struct ieee80211_sub_if_data *scan_sdata;
+       struct ieee80211_sub_if_data *sdata;
 
        lockdep_assert_held(&local->mtx);
 
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 
        ieee80211_mlme_notify_scan_completed(local);
        ieee80211_ibss_notify_scan_completed(local);
-       ieee80211_mesh_notify_scan_completed(local);
+
+       /* Requeue all the work that might have been ignored while
+        * the scan was in progress; if there was none this will
+        * just be a no-op for the particular interface.
+        */
+       list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+               if (ieee80211_sdata_running(sdata))
+                       ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+       }
+
        if (was_scanning)
                ieee80211_start_next_roc(local);
 }
@@ -1213,6 +1223,14 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
 
        trace_api_sched_scan_stopped(local);
 
+       /*
+        * this shouldn't really happen, so for simplicity
+        * simply ignore it, and let mac80211 reconfigure
+        * the sched scan later on.
+        */
+       if (local->in_reconfig)
+               return;
+
        schedule_work(&local->sched_scan_stopped_work);
 }
 EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
index 4402ad5b27d1530bf4cc9a0d674c6538f73cb568..a4a4f89d3ba01138c033a13a7d023a04a99e3e6d 100644 (file)
@@ -1453,7 +1453,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 
        more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
 
-       if (reason == IEEE80211_FRAME_RELEASE_PSPOLL)
+       if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
                driver_release_tids =
                        BIT(find_highest_prio_tid(driver_release_tids));
 
index 5bad05e9af90fc76201f4bd9a54b931ffd5c61bc..6101deb805a830a80acccbe05499ca4f4fc33988 100644 (file)
@@ -51,6 +51,11 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
        struct ieee80211_hdr *hdr = (void *)skb->data;
        int ac;
 
+       if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+               ieee80211_free_txskb(&local->hw, skb);
+               return;
+       }
+
        /*
         * This skb 'survived' a round-trip through the driver, and
         * hopefully the driver didn't mangle it too badly. However,
index 3943d4bf289c29b436765d82dd0f241c8b7f5530..58f58bd5202ff5ad99630bc7584a8afeb8c2cf26 100644 (file)
@@ -2043,16 +2043,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
                 */
                if (sched_scan_req->n_scan_plans > 1 ||
                    __ieee80211_request_sched_scan_start(sched_scan_sdata,
-                                                        sched_scan_req))
+                                                        sched_scan_req)) {
+                       RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
+                       RCU_INIT_POINTER(local->sched_scan_req, NULL);
                        sched_scan_stopped = true;
+               }
        mutex_unlock(&local->mtx);
 
        if (sched_scan_stopped)
                cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
 
  wake_up:
-       local->in_reconfig = false;
-       barrier();
+       if (local->in_reconfig) {
+               local->in_reconfig = false;
+               barrier();
+
+               /* Restart deferred ROCs */
+               mutex_lock(&local->mtx);
+               ieee80211_start_next_roc(local);
+               mutex_unlock(&local->mtx);
+       }
 
        if (local->monitors == local->open_count && local->monitors > 0)
                ieee80211_add_virtual_monitor(local);
index 43d8c9896fa3e66ea7f76ee1b7934c3129a46fd3..f0f688db6213930ad568b645080eb171bb30f345 100644 (file)
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
        };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-       if (e.cidr == 0)
-               return -EINVAL;
        if (adt == IPSET_TEST)
                e.cidr = HOST_MASK;
 
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
        };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
-       if (e.cidr == 0)
-               return -EINVAL;
        if (adt == IPSET_TEST)
                e.cidr = HOST_MASK;
 
index 3cb3cb831591ef79515b4bde66d5db825a732afb..58882de06bd74f254033e0f06521f264c18c8d44 100644 (file)
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+       spin_lock(lock);
+       while (unlikely(nf_conntrack_locks_all)) {
+               spin_unlock(lock);
+               spin_lock(&nf_conntrack_locks_all_lock);
+               spin_unlock(&nf_conntrack_locks_all_lock);
+               spin_lock(lock);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 {
        h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
        h1 %= CONNTRACK_LOCKS;
        h2 %= CONNTRACK_LOCKS;
        if (h1 <= h2) {
-               spin_lock(&nf_conntrack_locks[h1]);
+               nf_conntrack_lock(&nf_conntrack_locks[h1]);
                if (h1 != h2)
                        spin_lock_nested(&nf_conntrack_locks[h2],
                                         SINGLE_DEPTH_NESTING);
        } else {
-               spin_lock(&nf_conntrack_locks[h2]);
+               nf_conntrack_lock(&nf_conntrack_locks[h2]);
                spin_lock_nested(&nf_conntrack_locks[h1],
                                 SINGLE_DEPTH_NESTING);
        }
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
 {
        int i;
 
-       for (i = 0; i < CONNTRACK_LOCKS; i++)
-               spin_lock_nested(&nf_conntrack_locks[i], i);
+       spin_lock(&nf_conntrack_locks_all_lock);
+       nf_conntrack_locks_all = true;
+
+       for (i = 0; i < CONNTRACK_LOCKS; i++) {
+               spin_lock(&nf_conntrack_locks[i]);
+               spin_unlock(&nf_conntrack_locks[i]);
+       }
 }
 
 static void nf_conntrack_all_unlock(void)
 {
-       int i;
-
-       for (i = 0; i < CONNTRACK_LOCKS; i++)
-               spin_unlock(&nf_conntrack_locks[i]);
+       nf_conntrack_locks_all = false;
+       spin_unlock(&nf_conntrack_locks_all_lock);
 }
 
 unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ restart:
        hash = hash_bucket(_hash, net);
        for (; i < net->ct.htable_size; i++) {
                lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (read_seqcount_retry(&net->ct.generation, sequence)) {
                        spin_unlock(lockp);
                        goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
        for (; *bucket < net->ct.htable_size; (*bucket)++) {
                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                local_bh_disable();
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (*bucket < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
                                if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
index bd9d31537905e4e372da427ac29ce5948f2af34c..3b40ec575cd56a7b73c0b25d9c8decdda979d842 100644 (file)
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
        }
        local_bh_disable();
        for (i = 0; i < net->ct.htable_size; i++) {
-               spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
                if (i < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
                                unhelp(h, me);
index dbb1bb3edb456594184bb211332d1ee799af70ef..355e8552fd5b77682295493a22dc0a03d9338255 100644 (file)
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
 restart:
                lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
-               spin_lock(lockp);
+               nf_conntrack_lock(lockp);
                if (cb->args[0] >= net->ct.htable_size) {
                        spin_unlock(lockp);
                        goto out;
index b6605e0008013972cd94500db5bd626cb9bf3064..5eefe4a355c640cf85c8328740a877c6909880ba 100644 (file)
@@ -224,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
 
        nft_register_chain_type(&nft_filter_chain_netdev);
        ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
-       if (ret < 0)
+       if (ret < 0) {
                nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+               return ret;
+       }
        register_netdevice_notifier(&nf_tables_netdev_notifier);
-
-       return ret;
+       return 0;
 }
 
 static void __exit nf_tables_netdev_exit(void)
index 5d010f27ac018d079058000c11acc66d7086b57a..94837d236ab0e9a2fc3baa5d8fe7688a1b399afc 100644 (file)
@@ -307,12 +307,12 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
 
        local_bh_disable();
        for (i = 0; i < net->ct.htable_size; i++) {
-               spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
                if (i < net->ct.htable_size) {
                        hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
                                untimeout(h, timeout);
                }
-               spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+               nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
        }
        local_bh_enable();
 }
index 383c17138399aec2a5681580ddd3b6f200f29d67..b78c28ba465ff9c1eed6b6006b6c5e9d9da58072 100644 (file)
@@ -46,16 +46,14 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 8; i++) {
-                               src64 = get_unaligned_be64(&src[i]);
-                               src64 = be64_to_cpu((__force __be64)src64);
+                               src64 = get_unaligned((u64 *)&src[i]);
                                put_unaligned_be64(src64, &dst[i]);
                        }
                        break;
                case NFT_BYTEORDER_HTON:
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = get_unaligned_be64(&src[i]);
-                               src64 = (__force u64)cpu_to_be64(src64);
-                               put_unaligned_be64(src64, &dst[i]);
+                               put_unaligned(src64, (u64 *)&dst[i]);
                        }
                        break;
                }
index a0eb2161e3efdb3476f692ce242cafaff30e7f18..d4a4619fcebc927acd1bf730e044d29813d096d3 100644 (file)
@@ -127,6 +127,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                               NF_CT_LABELS_MAX_SIZE - size);
                return;
        }
+#endif
        case NFT_CT_BYTES: /* fallthrough */
        case NFT_CT_PKTS: {
                const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
@@ -138,7 +139,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                memcpy(dest, &count, sizeof(count));
                return;
        }
-#endif
        default:
                break;
        }
index b7c43def0dc69e1cd03db238e0174283d895003a..e118397254af026fd23ed3097ddc78c8ee9fc0c4 100644 (file)
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
        u8 nexthdr;
-       __be16 frag_off;
+       __be16 frag_off, oldlen, newlen;
        int tcphoff;
        int ret;
 
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
                return NF_DROP;
        if (ret > 0) {
                ipv6h = ipv6_hdr(skb);
-               ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+               oldlen = ipv6h->payload_len;
+               newlen = htons(ntohs(oldlen) + ret);
+               if (skb->ip_summed == CHECKSUM_COMPLETE)
+                       skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+                                            newlen);
+               ipv6h->payload_len = newlen;
        }
        return XT_CONTINUE;
 }
index 81dc1bb6e0168ed17cd2d60e6dc3a394f0f34e76..f1ffb34e253f4906275d4da99ce26306b94241eb 100644 (file)
@@ -2831,7 +2831,8 @@ static int netlink_dump(struct sock *sk)
         * reasonable static buffer based on the expected largest dump of a
         * single netdev. The outcome is MSG_TRUNC error.
         */
-       skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+       if (!netlink_rx_is_mmaped(sk))
+               skb_reserve(skb, skb_tailroom(skb) - alloc_size);
        netlink_skb_set_owner_r(skb, sk);
 
        len = cb->dump(skb, cb);
index f222885ac0c7397ed08c26106e68157d91267ff6..9481d55ff6cb2dd7a228964fa3d9dea154a5a9af 100644 (file)
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
 static void rds_ib_add_one(struct ib_device *device)
 {
        struct rds_ib_device *rds_ibdev;
-       struct ib_device_attr *dev_attr;
 
        /* Only handle IB (no iWARP) devices */
        if (device->node_type != RDMA_NODE_IB_CA)
                return;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               rdsdebug("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
                                 ibdev_to_node(device));
        if (!rds_ibdev)
-               goto free_attr;
+               return;
 
        spin_lock_init(&rds_ibdev->spinlock);
        atomic_set(&rds_ibdev->refcount, 1);
        INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
-       rds_ibdev->max_wrs = dev_attr->max_qp_wr;
-       rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+       rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+       rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
 
-       rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
-       rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
-               min_t(unsigned int, (dev_attr->max_mr / 2),
+       rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+       rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+               min_t(unsigned int, (device->attrs.max_mr / 2),
                      rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
 
-       rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
-               min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+       rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+               min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
                      rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
 
-       rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
-       rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+       rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+       rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
 
        rds_ibdev->dev = device;
        rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
        }
 
        rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
-                dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+                device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
                 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
                 rds_ibdev->max_8k_fmrs);
 
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
 
 put_dev:
        rds_ib_dev_put(rds_ibdev);
-free_attr:
-       kfree(dev_attr);
 }
 
 /*
index 576f1825fc55769f7242c75c771a50a6e8e5e93b..f4a9fff829e0e5193697ad334065dd423ab4fe0f 100644 (file)
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
 static void rds_iw_add_one(struct ib_device *device)
 {
        struct rds_iw_device *rds_iwdev;
-       struct ib_device_attr *dev_attr;
 
        /* Only handle iwarp devices */
        if (device->node_type != RDMA_NODE_RNIC)
                return;
 
-       dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
-       if (!dev_attr)
-               return;
-
-       if (ib_query_device(device, dev_attr)) {
-               rdsdebug("Query device failed for %s\n", device->name);
-               goto free_attr;
-       }
-
        rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
        if (!rds_iwdev)
-               goto free_attr;
+               return;
 
        spin_lock_init(&rds_iwdev->spinlock);
 
-       rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
-       rds_iwdev->max_wrs = dev_attr->max_qp_wr;
-       rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+       rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+       rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+       rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
 
        rds_iwdev->dev = device;
        rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
        list_add_tail(&rds_iwdev->list, &rds_iw_devices);
 
        ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
-       goto free_attr;
+       return;
 
 err_mr:
        if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
        ib_dealloc_pd(rds_iwdev->pd);
 free_dev:
        kfree(rds_iwdev);
-free_attr:
-       kfree(dev_attr);
 }
 
 static void rds_iw_remove_one(struct ib_device *device, void *client_data)
index f53bf3b6558b094b6e1b379568620157b4ffae17..cf5b69ab18294bb2fa36a00f2a19dbf02d0f3045 100644 (file)
@@ -1095,17 +1095,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
        return res;
 }
 
-static bool rfkill_readable(struct rfkill_data *data)
-{
-       bool r;
-
-       mutex_lock(&data->mtx);
-       r = !list_empty(&data->events);
-       mutex_unlock(&data->mtx);
-
-       return r;
-}
-
 static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                               size_t count, loff_t *pos)
 {
@@ -1122,8 +1111,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                        goto out;
                }
                mutex_unlock(&data->mtx);
+               /* since we re-check and it just compares pointers,
+                * using !list_empty() without locking isn't a problem
+                */
                ret = wait_event_interruptible(data->read_wait,
-                                              rfkill_readable(data));
+                                              !list_empty(&data->events));
                mutex_lock(&data->mtx);
 
                if (ret)
index f26bdea875c1a8413774e767bb897012d969d2e6..a1cd778240cd7f2b14336c26ba6eec282c1c60b6 100644 (file)
@@ -403,6 +403,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
                if (len <= cl->deficit) {
                        cl->deficit -= len;
                        skb = qdisc_dequeue_peeked(cl->qdisc);
+                       if (unlikely(skb == NULL))
+                               goto out;
                        if (cl->qdisc->q.qlen == 0)
                                list_del(&cl->alist);
 
index bf61dfb8e09eea1034c7b9ddd40583752033230c..49d2cc751386f3208c67eed17ec9c3f15b801f50 100644 (file)
@@ -935,15 +935,22 @@ static struct sctp_association *__sctp_lookup_association(
                                        struct sctp_transport **pt)
 {
        struct sctp_transport *t;
+       struct sctp_association *asoc = NULL;
 
+       rcu_read_lock();
        t = sctp_addrs_lookup_transport(net, local, peer);
-       if (!t || t->dead)
-               return NULL;
+       if (!t || !sctp_transport_hold(t))
+               goto out;
 
-       sctp_association_hold(t->asoc);
+       asoc = t->asoc;
+       sctp_association_hold(asoc);
        *pt = t;
 
-       return t->asoc;
+       sctp_transport_put(t);
+
+out:
+       rcu_read_unlock();
+       return asoc;
 }
 
 /* Look up an association. protected by RCU read lock */
@@ -955,9 +962,7 @@ struct sctp_association *sctp_lookup_association(struct net *net,
 {
        struct sctp_association *asoc;
 
-       rcu_read_lock();
        asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
-       rcu_read_unlock();
 
        return asoc;
 }
index 684c5b31563badc1f097434211422f495d356aad..ded7d931a6a5b218c7bbd4481acdea1d2bc29325 100644 (file)
@@ -165,8 +165,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
        list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
                        transports) {
                addr = &transport->ipaddr;
-               if (transport->dead)
-                       continue;
 
                af = sctp_get_af_specific(addr->sa.sa_family);
                if (af->cmp_addr(addr, primary)) {
@@ -380,6 +378,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
        }
 
        transport = (struct sctp_transport *)v;
+       if (!sctp_transport_hold(transport))
+               return 0;
        assoc = transport->asoc;
        epb = &assoc->base;
        sk = epb->sk;
@@ -412,6 +412,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
                sk->sk_rcvbuf);
        seq_printf(seq, "\n");
 
+       sctp_transport_put(transport);
+
        return 0;
 }
 
@@ -489,12 +491,12 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
        }
 
        tsp = (struct sctp_transport *)v;
+       if (!sctp_transport_hold(tsp))
+               return 0;
        assoc = tsp->asoc;
 
        list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
                                transports) {
-               if (tsp->dead)
-                       continue;
                /*
                 * The remote address (ADDR)
                 */
@@ -544,6 +546,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
                seq_printf(seq, "\n");
        }
 
+       sctp_transport_put(tsp);
+
        return 0;
 }
 
index 2e21384697c2a6a5fd045142bcd9c39992d3867f..b5327bb77458f3ae68e0098c45d00307a56ed2c5 100644 (file)
@@ -259,12 +259,6 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
                goto out_unlock;
        }
 
-       /* Is this transport really dead and just waiting around for
-        * the timer to let go of the reference?
-        */
-       if (transport->dead)
-               goto out_unlock;
-
        /* Run through the state machine.  */
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +374,6 @@ void sctp_generate_heartbeat_event(unsigned long data)
                goto out_unlock;
        }
 
-       /* Is this structure just waiting around for us to actually
-        * get destroyed?
-        */
-       if (transport->dead)
-               goto out_unlock;
-
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
                           asoc->state, asoc->ep, asoc,
index 9bb80ec4c08ff06f6e629078c5a926c3def3ce23..5ca2ebfe0be83882fcb841de6fa8029b6455ef85 100644 (file)
@@ -6636,6 +6636,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
                        if (cmsgs->srinfo->sinfo_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+                             SCTP_SACK_IMMEDIATELY |
                              SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
@@ -6659,6 +6660,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 
                        if (cmsgs->sinfo->snd_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+                             SCTP_SACK_IMMEDIATELY |
                              SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
index aab9e3f29755a58eb7584aa763b4b1a46d2e6608..a431c14044a46f98e9b73f66e28f229690ef3c01 100644 (file)
@@ -132,8 +132,6 @@ fail:
  */
 void sctp_transport_free(struct sctp_transport *transport)
 {
-       transport->dead = 1;
-
        /* Try to delete the heartbeat timer.  */
        if (del_timer(&transport->hb_timer))
                sctp_transport_put(transport);
@@ -169,7 +167,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
  */
 static void sctp_transport_destroy(struct sctp_transport *transport)
 {
-       if (unlikely(!transport->dead)) {
+       if (unlikely(atomic_read(&transport->refcnt))) {
                WARN(1, "Attempt to destroy undead transport %p!\n", transport);
                return;
        }
@@ -296,9 +294,9 @@ void sctp_transport_route(struct sctp_transport *transport,
 }
 
 /* Hold a reference to a transport.  */
-void sctp_transport_hold(struct sctp_transport *transport)
+int sctp_transport_hold(struct sctp_transport *transport)
 {
-       atomic_inc(&transport->refcnt);
+       return atomic_add_unless(&transport->refcnt, 1, 0);
 }
 
 /* Release a reference to a transport and clean up
index 5e4f815c2b34d22fba11ac2443e9d66bc27c779c..2b32fd602669f1fbc1aa96d61ad076fe688713e1 100644 (file)
@@ -771,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        if (count == 0)
                return 0;
 
-       mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+       inode_lock(inode); /* protect against multiple concurrent
                              * readers on this file */
  again:
        spin_lock(&queue_lock);
@@ -784,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        }
        if (rp->q.list.next == &cd->queue) {
                spin_unlock(&queue_lock);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                WARN_ON_ONCE(rp->offset);
                return 0;
        }
@@ -838,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
        }
        if (err == -EAGAIN)
                goto again;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return err ? err :  count;
 }
 
@@ -909,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
        if (!cd->cache_parse)
                goto out;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        ret = cache_downcall(mapping, buf, count, cd);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 out:
        return ret;
 }
index 14f45bf0410c688b25fafedfb6636cfa025ad06c..31789ef3e614484a4d5c75721d4b9f63fc735f6a 100644 (file)
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode)
        int need_release;
        LIST_HEAD(free_list);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        spin_lock(&pipe->lock);
        need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
        pipe->nreaders = 0;
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode)
        cancel_delayed_work_sync(&pipe->queue_timeout);
        rpc_inode_setowner(inode, NULL);
        RPC_I(inode)->pipe = NULL;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 static struct inode *
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
        int first_open;
        int res = -ENXIO;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL)
                goto out;
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
                pipe->nwriters++;
        res = 0;
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
        struct rpc_pipe_msg *msg;
        int last_close;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL)
                goto out;
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
        if (last_close && pipe->ops->release_pipe)
                pipe->ops->release_pipe(inode);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return 0;
 }
 
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
        struct rpc_pipe_msg *msg;
        int res = 0;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        pipe = RPC_I(inode)->pipe;
        if (pipe == NULL) {
                res = -EPIPE;
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
                pipe->ops->destroy_msg(msg);
        }
 out_unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
        struct inode *inode = file_inode(filp);
        int res;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        res = -EPIPE;
        if (RPC_I(inode)->pipe != NULL)
                res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return res;
 }
 
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 
        poll_wait(filp, &rpci->waitq, wait);
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (rpci->pipe == NULL)
                mask |= POLLERR | POLLHUP;
        else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
                mask |= POLLIN | POLLRDNORM;
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return mask;
 }
 
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
        switch (cmd) {
        case FIONREAD:
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                pipe = RPC_I(inode)->pipe;
                if (pipe == NULL) {
-                       mutex_unlock(&inode->i_mutex);
+                       inode_unlock(inode);
                        return -EPIPE;
                }
                spin_lock(&pipe->lock);
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        len += msg->len - msg->copied;
                }
                spin_unlock(&pipe->lock);
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
                return put_user(len, (int __user *)arg);
        default:
                return -EINVAL;
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry)
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        error = __rpc_rmdir(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent,
 {
        struct inode *dir = d_inode(parent);
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+       inode_lock_nested(dir, I_MUTEX_CHILD);
        __rpc_depopulate(parent, files, start, eof);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
 }
 
 static int rpc_populate(struct dentry *parent,
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent,
        struct dentry *dentry;
        int i, err;
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        for (i = start; i < eof; i++) {
                dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
                err = PTR_ERR(dentry);
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent,
                if (err != 0)
                        goto out_bad;
        }
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return 0;
 out_bad:
        __rpc_depopulate(parent, files, start, eof);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
                        __FILE__, __func__, parent);
        return err;
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
        struct inode *dir = d_inode(parent);
        int error;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        dentry = __rpc_lookup_create_exclusive(parent, name);
        if (IS_ERR(dentry))
                goto out;
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
                        goto err_rmdir;
        }
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 err_rmdir:
        __rpc_rmdir(dir, dentry);
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        if (depopulate != NULL)
                depopulate(dentry);
        error = __rpc_rmdir(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
        if (pipe->ops->downcall == NULL)
                umode &= ~S_IWUGO;
 
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        dentry = __rpc_lookup_create_exclusive(parent, name);
        if (IS_ERR(dentry))
                goto out;
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
        if (err)
                goto out_err;
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 out_err:
        dentry = ERR_PTR(err);
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry)
 
        parent = dget_parent(dentry);
        dir = d_inode(parent);
-       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       inode_lock_nested(dir, I_MUTEX_PARENT);
        error = __rpc_rmpipe(dir, dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        dput(parent);
        return error;
 }
index 2e98f4a243e57d4539b3f4048d54c4b53f3c12d5..37edea6fa92d9685570ad3c1b37cb5fda5c0b9b3 100644 (file)
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
        if (atomic_dec_and_test(&xprt->count))
                xprt_destroy(xprt);
 }
+EXPORT_SYMBOL_GPL(xprt_put);
index 33f99d3004f2718206ad6b245a214e20718ac540..dc9f3b513a05f3a2fc9027359f9e1e587f9d243b 100644 (file)
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
        fmr_ops.o frwr_ops.o physical_ops.o \
-       svc_rdma.o svc_rdma_transport.o \
+       svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
        svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
        module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
index c6836844bd0eb65ffa7134a6a014d60b5b42af1f..e16567389e28f5eba1359ddf91802d63c4612c26 100644 (file)
@@ -190,12 +190,11 @@ static int
 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
             struct rpcrdma_create_data_internal *cdata)
 {
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        int depth, delta;
 
        ia->ri_max_frmr_depth =
                        min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-                             devattr->max_fast_reg_page_list_len);
+                             ia->ri_device->attrs.max_fast_reg_page_list_len);
        dprintk("RPC:       %s: device's max FR page list len = %u\n",
                __func__, ia->ri_max_frmr_depth);
 
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
        }
 
        ep->rep_attr.cap.max_send_wr *= depth;
-       if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
-               cdata->max_requests = devattr->max_qp_wr / depth;
+       if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+               cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
                if (!cdata->max_requests)
                        return -EINVAL;
                ep->rep_attr.cap.max_send_wr = cdata->max_requests *
index 1b7051bdbdc863dcd149b397bbc3f84a8872c5b1..c846ca9f1ebaa661f1e4a93893752950d35a4b6a 100644 (file)
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
 static unsigned int min_ord = 1;
 static unsigned int max_ord = 4096;
 unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
 static unsigned int min_max_requests = 4;
 static unsigned int max_max_requests = 16384;
 unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
 struct workqueue_struct *svc_rdma_wq;
 
 /*
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void)
        svc_unreg_xprt_class(&svc_rdma_bc_class);
 #endif
        svc_unreg_xprt_class(&svc_rdma_class);
-       kmem_cache_destroy(svc_rdma_map_cachep);
-       kmem_cache_destroy(svc_rdma_ctxt_cachep);
 }
 
 int svc_rdma_init(void)
 {
        dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
        dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
-       dprintk("\tmax_requests     : %d\n", svcrdma_max_requests);
-       dprintk("\tsq_depth         : %d\n",
+       dprintk("\tmax_requests     : %u\n", svcrdma_max_requests);
+       dprintk("\tsq_depth         : %u\n",
                svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+       dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
        dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
        svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -264,39 +260,10 @@ int svc_rdma_init(void)
                svcrdma_table_header =
                        register_sysctl_table(svcrdma_root_table);
 
-       /* Create the temporary map cache */
-       svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
-                                               sizeof(struct svc_rdma_req_map),
-                                               0,
-                                               SLAB_HWCACHE_ALIGN,
-                                               NULL);
-       if (!svc_rdma_map_cachep) {
-               printk(KERN_INFO "Could not allocate map cache.\n");
-               goto err0;
-       }
-
-       /* Create the temporary context cache */
-       svc_rdma_ctxt_cachep =
-               kmem_cache_create("svc_rdma_ctxt_cache",
-                                 sizeof(struct svc_rdma_op_ctxt),
-                                 0,
-                                 SLAB_HWCACHE_ALIGN,
-                                 NULL);
-       if (!svc_rdma_ctxt_cachep) {
-               printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
-               goto err1;
-       }
-
        /* Register RDMA with the SVC transport switch */
        svc_reg_xprt_class(&svc_rdma_class);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
        svc_reg_xprt_class(&svc_rdma_bc_class);
 #endif
        return 0;
- err1:
-       kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
-       unregister_sysctl_table(svcrdma_table_header);
-       destroy_workqueue(svc_rdma_wq);
-       return -ENOMEM;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644 (file)
index 0000000..65a7c23
--- /dev/null
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY        RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+                            struct xdr_buf *rcvbuf)
+{
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct kvec *dst, *src = &rcvbuf->head[0];
+       struct rpc_rqst *req;
+       unsigned long cwnd;
+       u32 credits;
+       size_t len;
+       __be32 xid;
+       __be32 *p;
+       int ret;
+
+       p = (__be32 *)src->iov_base;
+       len = src->iov_len;
+       xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+       pr_info("%s: xid=%08x, length=%zu\n",
+               __func__, be32_to_cpu(xid), len);
+       pr_info("%s: RPC/RDMA: %*ph\n",
+               __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+       pr_info("%s:      RPC: %*ph\n",
+               __func__, (int)len, p);
+#endif
+
+       ret = -EAGAIN;
+       if (src->iov_len < 24)
+               goto out_shortreply;
+
+       spin_lock_bh(&xprt->transport_lock);
+       req = xprt_lookup_rqst(xprt, xid);
+       if (!req)
+               goto out_notfound;
+
+       dst = &req->rq_private_buf.head[0];
+       memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+       if (dst->iov_len < len)
+               goto out_unlock;
+       memcpy(dst->iov_base, p, len);
+
+       credits = be32_to_cpu(rmsgp->rm_credit);
+       if (credits == 0)
+               credits = 1;    /* don't deadlock */
+       else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+               credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+       cwnd = xprt->cwnd;
+       xprt->cwnd = credits << RPC_CWNDSHIFT;
+       if (xprt->cwnd > cwnd)
+               xprt_release_rqst_cong(req->rq_task);
+
+       ret = 0;
+       xprt_complete_rqst(req->rq_task, rcvbuf->len);
+       rcvbuf->len = 0;
+
+out_unlock:
+       spin_unlock_bh(&xprt->transport_lock);
+out:
+       return ret;
+
+out_shortreply:
+       dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+               xprt, src->iov_len);
+       goto out;
+
+out_notfound:
+       dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+               xprt, be32_to_cpu(xid));
+
+       goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+                             struct rpc_rqst *rqst)
+{
+       struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+       struct svc_rdma_op_ctxt *ctxt;
+       struct svc_rdma_req_map *vec;
+       struct ib_send_wr send_wr;
+       int ret;
+
+       vec = svc_rdma_get_req_map(rdma);
+       ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+       if (ret)
+               goto out_err;
+
+       /* Post a recv buffer to handle the reply for this request. */
+       ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+       if (ret) {
+               pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+                      ret);
+               pr_err("svcrdma: closing transport %p.\n", rdma);
+               set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+               ret = -ENOTCONN;
+               goto out_err;
+       }
+
+       ctxt = svc_rdma_get_context(rdma);
+       ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+       ctxt->count = 1;
+
+       ctxt->wr_op = IB_WR_SEND;
+       ctxt->direction = DMA_TO_DEVICE;
+       ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+       ctxt->sge[0].length = sndbuf->len;
+       ctxt->sge[0].addr =
+           ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+                           sndbuf->len, DMA_TO_DEVICE);
+       if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+               ret = -EIO;
+               goto out_unmap;
+       }
+       atomic_inc(&rdma->sc_dma_used);
+
+       memset(&send_wr, 0, sizeof(send_wr));
+       send_wr.wr_id = (unsigned long)ctxt;
+       send_wr.sg_list = ctxt->sge;
+       send_wr.num_sge = 1;
+       send_wr.opcode = IB_WR_SEND;
+       send_wr.send_flags = IB_SEND_SIGNALED;
+
+       ret = svc_rdma_send(rdma, &send_wr);
+       if (ret) {
+               ret = -EIO;
+               goto out_unmap;
+       }
+
+out_err:
+       svc_rdma_put_req_map(rdma, vec);
+       dprintk("svcrdma: %s returns %d\n", __func__, ret);
+       return ret;
+
+out_unmap:
+       svc_rdma_unmap_dma(ctxt);
+       svc_rdma_put_context(ctxt, 1);
+       goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+       struct rpc_rqst *rqst = task->tk_rqstp;
+       struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+       struct svcxprt_rdma *rdma;
+       struct page *page;
+
+       rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+       /* Prevent an infinite loop: try to make this case work */
+       if (size > PAGE_SIZE)
+               WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+                         size);
+
+       page = alloc_page(RPCRDMA_DEF_GFP);
+       if (!page)
+               return NULL;
+
+       return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+       /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+       struct rpc_xprt *xprt = rqst->rq_xprt;
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+       int rc;
+
+       /* Space in the send buffer for an RPC/RDMA header is reserved
+        * via xprt->tsh_size.
+        */
+       headerp->rm_xid = rqst->rq_xid;
+       headerp->rm_vers = rpcrdma_version;
+       headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+       headerp->rm_type = rdma_msg;
+       headerp->rm_body.rm_chunks[0] = xdr_zero;
+       headerp->rm_body.rm_chunks[1] = xdr_zero;
+       headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+       pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+       rc = svc_rdma_bc_sendto(rdma, rqst);
+       if (rc)
+               goto drop_connection;
+       return rc;
+
+drop_connection:
+       dprintk("svcrdma: failed to send bc call\n");
+       xprt_disconnect_done(xprt);
+       return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+       struct rpc_rqst *rqst = task->tk_rqstp;
+       struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+       struct svcxprt_rdma *rdma;
+       int ret;
+
+       dprintk("svcrdma: sending bc call with xid: %08x\n",
+               be32_to_cpu(rqst->rq_xid));
+
+       if (!mutex_trylock(&sxprt->xpt_mutex)) {
+               rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+               if (!mutex_trylock(&sxprt->xpt_mutex))
+                       return -EAGAIN;
+               rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+       }
+
+       ret = -ENOTCONN;
+       rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+       if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+               ret = rpcrdma_bc_send_request(rdma, rqst);
+
+       mutex_unlock(&sxprt->xpt_mutex);
+
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+       dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+       dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+       xprt_free(xprt);
+       module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+       .reserve_xprt           = xprt_reserve_xprt_cong,
+       .release_xprt           = xprt_release_xprt_cong,
+       .alloc_slot             = xprt_alloc_slot,
+       .release_request        = xprt_release_rqst_cong,
+       .buf_alloc              = xprt_rdma_bc_allocate,
+       .buf_free               = xprt_rdma_bc_free,
+       .send_request           = xprt_rdma_bc_send_request,
+       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .close                  = xprt_rdma_bc_close,
+       .destroy                = xprt_rdma_bc_put,
+       .print_stats            = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+       .to_initval = 60 * HZ,
+       .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+       struct rpc_xprt *xprt;
+       struct rpcrdma_xprt *new_xprt;
+
+       if (args->addrlen > sizeof(xprt->addr)) {
+               dprintk("RPC:       %s: address too large\n", __func__);
+               return ERR_PTR(-EBADF);
+       }
+
+       xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+                         RPCRDMA_MAX_BC_REQUESTS,
+                         RPCRDMA_MAX_BC_REQUESTS);
+       if (!xprt) {
+               dprintk("RPC:       %s: couldn't allocate rpc_xprt\n",
+                       __func__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       xprt->timeout = &xprt_rdma_bc_timeout;
+       xprt_set_bound(xprt);
+       xprt_set_connected(xprt);
+       xprt->bind_timeout = RPCRDMA_BIND_TO;
+       xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+       xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+       xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+       xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+       xprt->ops = &xprt_rdma_bc_procs;
+
+       memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+       xprt->addrlen = args->addrlen;
+       xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+       xprt->resvport = 0;
+
+       xprt->max_payload = xprt_rdma_max_inline_read;
+
+       new_xprt = rpcx_to_rdmax(xprt);
+       new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+       xprt_get(xprt);
+       args->bc_xprt->xpt_bc_xprt = xprt;
+       xprt->bc_xprt = args->bc_xprt;
+
+       if (!try_module_get(THIS_MODULE))
+               goto out_fail;
+
+       /* Final put for backchannel xprt is in __svc_rdma_free */
+       xprt_get(xprt);
+       return xprt;
+
+out_fail:
+       xprt_rdma_free_addresses(xprt);
+       args->bc_xprt->xpt_bc_xprt = NULL;
+       xprt_put(xprt);
+       xprt_free(xprt);
+       return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+       .list                   = LIST_HEAD_INIT(xprt_rdma_bc.list),
+       .name                   = "rdma backchannel",
+       .owner                  = THIS_MODULE,
+       .ident                  = XPRT_TRANSPORT_BC_RDMA,
+       .setup                  = xprt_setup_rdma_bc,
+};
index ff4f01e527ecc08a1480ecba8f00d41a90a76571..c8b8a8b4181eafa75de0d673040b9f927fa74d9d 100644 (file)
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 
                head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
                head->arg.page_len += len;
+
                head->arg.len += len;
                if (!pg_off)
                        head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
 
-               /* The lkey here is either a local dma lkey or a dma_mr lkey */
-               ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+               ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->sge[pno].length = len;
                ctxt->count++;
 
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
        return ret;
 }
 
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+       __be32 *p = (__be32 *)rmsgp;
+
+       if (!xprt->xpt_bc_xprt)
+               return false;
+
+       if (rmsgp->rm_type != rdma_msg)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+               return false;
+       if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+               return false;
+
+       /* sanity */
+       if (p[7] != rmsgp->rm_xid)
+               return false;
+       /* call direction */
+       if (p[8] == cpu_to_be32(RPC_CALL))
+               return false;
+
+       return true;
+}
+
 /*
  * Set up the rqstp thread context to point to the RQ buffer. If
  * necessary, pull additional data from the client with an RDMA_READ
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                goto close_out;
        }
 
+       if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+               ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+                                              &rqstp->rq_arg);
+               svc_rdma_put_context(ctxt, 0);
+               if (ret)
+                       goto repost;
+               return ret;
+       }
+
        /* Read read-list data. */
        ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
        if (ret > 0) {
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
        set_bit(XPT_CLOSE, &xprt->xpt_flags);
 defer:
        return 0;
+
+repost:
+       ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+       if (ret) {
+               pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+                      ret);
+               pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+               set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+               ret = -ENOTCONN;
+       }
+       return ret;
 }
index 969a1ab75fc3c5fb8011157e4f57e8d08f560b42..df57f3ce6cd2cc3cae5fa2a709e62ceae2828a26 100644 (file)
@@ -50,9 +50,9 @@
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
-static int map_xdr(struct svcxprt_rdma *xprt,
-                  struct xdr_buf *xdr,
-                  struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+                    struct xdr_buf *xdr,
+                    struct svc_rdma_req_map *vec)
 {
        int sge_no;
        u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
 
        if (xdr->len !=
            (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
-               pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+               pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
                return -EIO;
        }
 
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
                sge_no++;
        }
 
-       dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+       dprintk("svcrdma: %s: sge_no %d page_no %d "
                "page_base %u page_len %u head_len %zu tail_len %zu\n",
-               sge_no, page_no, xdr->page_base, xdr->page_len,
+               __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
                xdr->head[0].iov_len, xdr->tail[0].iov_len);
 
        vec->count = sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
                                         sge[sge_no].addr))
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
-               sge[sge_no].lkey = xprt->sc_dma_lkey;
+               sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->count++;
                sge_off = 0;
                sge_no++;
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
        int ret;
 
        /* Post a recv buffer to handle another request. */
-       ret = svc_rdma_post_recv(rdma);
+       ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
        if (ret) {
                printk(KERN_INFO
                       "svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
        ctxt->count = 1;
 
        /* Prepare the SGE for the RPCRDMA Header */
-       ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+       ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
        ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
        ctxt->sge[0].addr =
            ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
                                         ctxt->sge[sge_no].addr))
                        goto err;
                atomic_inc(&rdma->sc_dma_used);
-               ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+               ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
                ctxt->sge[sge_no].length = sge_bytes;
        }
        if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        /* Build an req vec for the XDR */
        ctxt = svc_rdma_get_context(rdma);
        ctxt->direction = DMA_TO_DEVICE;
-       vec = svc_rdma_get_req_map();
-       ret = map_xdr(rdma, &rqstp->rq_res, vec);
+       vec = svc_rdma_get_req_map(rdma);
+       ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
        if (ret)
                goto err0;
        inline_bytes = rqstp->rq_res.len;
 
        /* Create the RDMA response header */
-       res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+       ret = -ENOMEM;
+       res_page = alloc_page(GFP_KERNEL);
+       if (!res_page)
+               goto err0;
        rdma_resp = page_address(res_page);
        reply_ary = svc_rdma_get_reply_array(rdma_argp);
        if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
        ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
                         inline_bytes);
-       svc_rdma_put_req_map(vec);
+       svc_rdma_put_req_map(rdma, vec);
        dprintk("svcrdma: send_reply returns %d\n", ret);
        return ret;
 
  err1:
        put_page(res_page);
  err0:
-       svc_rdma_put_req_map(vec);
+       svc_rdma_put_req_map(rdma, vec);
        svc_rdma_put_context(ctxt, 0);
        return ret;
 }
index b348b4adef29a48246709cc7f32cf576865753eb..5763825d09bf776bfa89f0007be1805203d96adf 100644 (file)
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+                                          gfp_t flags)
 {
        struct svc_rdma_op_ctxt *ctxt;
 
-       ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
-                               GFP_KERNEL | __GFP_NOFAIL);
-       ctxt->xprt = xprt;
-       INIT_LIST_HEAD(&ctxt->dto_q);
+       ctxt = kmalloc(sizeof(*ctxt), flags);
+       if (ctxt) {
+               ctxt->xprt = xprt;
+               INIT_LIST_HEAD(&ctxt->free);
+               INIT_LIST_HEAD(&ctxt->dto_q);
+       }
+       return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+       unsigned int i;
+
+       /* Each RPC/RDMA credit can consume a number of send
+        * and receive WQEs. One ctxt is allocated for each.
+        */
+       i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+       while (i--) {
+               struct svc_rdma_op_ctxt *ctxt;
+
+               ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+               if (!ctxt) {
+                       dprintk("svcrdma: No memory for RDMA ctxt\n");
+                       return false;
+               }
+               list_add(&ctxt->free, &xprt->sc_ctxts);
+       }
+       return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+       struct svc_rdma_op_ctxt *ctxt = NULL;
+
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used++;
+       if (list_empty(&xprt->sc_ctxts))
+               goto out_empty;
+
+       ctxt = list_first_entry(&xprt->sc_ctxts,
+                               struct svc_rdma_op_ctxt, free);
+       list_del_init(&ctxt->free);
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
        ctxt->count = 0;
        ctxt->frmr = NULL;
-       atomic_inc(&xprt->sc_ctxt_used);
        return ctxt;
+
+out_empty:
+       /* Either pre-allocation missed the mark, or send
+        * queue accounting is broken.
+        */
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+       ctxt = alloc_ctxt(xprt, GFP_NOIO);
+       if (ctxt)
+               goto out;
+
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used--;
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+       return NULL;
 }
 
 void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
        for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
                /*
                 * Unmap the DMA addr in the SGE if the lkey matches
-                * the sc_dma_lkey, otherwise, ignore it since it is
+                * the local_dma_lkey, otherwise, ignore it since it is
                 * an FRMR lkey and will be unmapped later when the
                 * last WR that uses it completes.
                 */
-               if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+               if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
                        atomic_dec(&xprt->sc_dma_used);
                        ib_dma_unmap_page(xprt->sc_cm_id->device,
                                            ctxt->sge[i].addr,
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 
 void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 {
-       struct svcxprt_rdma *xprt;
+       struct svcxprt_rdma *xprt = ctxt->xprt;
        int i;
 
-       xprt = ctxt->xprt;
        if (free_pages)
                for (i = 0; i < ctxt->count; i++)
                        put_page(ctxt->pages[i]);
 
-       kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
-       atomic_dec(&xprt->sc_ctxt_used);
+       spin_lock_bh(&xprt->sc_ctxt_lock);
+       xprt->sc_ctxt_used--;
+       list_add(&ctxt->free, &xprt->sc_ctxts);
+       spin_unlock_bh(&xprt->sc_ctxt_lock);
 }
 
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+       while (!list_empty(&xprt->sc_ctxts)) {
+               struct svc_rdma_op_ctxt *ctxt;
+
+               ctxt = list_first_entry(&xprt->sc_ctxts,
+                                       struct svc_rdma_op_ctxt, free);
+               list_del(&ctxt->free);
+               kfree(ctxt);
+       }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
 {
        struct svc_rdma_req_map *map;
-       map = kmem_cache_alloc(svc_rdma_map_cachep,
-                              GFP_KERNEL | __GFP_NOFAIL);
+
+       map = kmalloc(sizeof(*map), flags);
+       if (map)
+               INIT_LIST_HEAD(&map->free);
+       return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+       unsigned int i;
+
+       /* One for each receive buffer on this connection. */
+       i = xprt->sc_max_requests;
+
+       while (i--) {
+               struct svc_rdma_req_map *map;
+
+               map = alloc_req_map(GFP_KERNEL);
+               if (!map) {
+                       dprintk("svcrdma: No memory for request map\n");
+                       return false;
+               }
+               list_add(&map->free, &xprt->sc_maps);
+       }
+       return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+       struct svc_rdma_req_map *map = NULL;
+
+       spin_lock(&xprt->sc_map_lock);
+       if (list_empty(&xprt->sc_maps))
+               goto out_empty;
+
+       map = list_first_entry(&xprt->sc_maps,
+                              struct svc_rdma_req_map, free);
+       list_del_init(&map->free);
+       spin_unlock(&xprt->sc_map_lock);
+
+out:
        map->count = 0;
        return map;
+
+out_empty:
+       spin_unlock(&xprt->sc_map_lock);
+
+       /* Pre-allocation amount was incorrect */
+       map = alloc_req_map(GFP_NOIO);
+       if (map)
+               goto out;
+
+       WARN_ONCE(1, "svcrdma: empty request map list?\n");
+       return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+                         struct svc_rdma_req_map *map)
+{
+       spin_lock(&xprt->sc_map_lock);
+       list_add(&map->free, &xprt->sc_maps);
+       spin_unlock(&xprt->sc_map_lock);
 }
 
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
 {
-       kmem_cache_free(svc_rdma_map_cachep, map);
+       while (!list_empty(&xprt->sc_maps)) {
+               struct svc_rdma_req_map *map;
+
+               map = list_first_entry(&xprt->sc_maps,
+                                      struct svc_rdma_req_map, free);
+               list_del(&map->free);
+               kfree(map);
+       }
 }
 
 /* ib_cq event handler */
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 static void process_context(struct svcxprt_rdma *xprt,
                            struct svc_rdma_op_ctxt *ctxt)
 {
+       struct svc_rdma_op_ctxt *read_hdr;
+       int free_pages = 0;
+
        svc_rdma_unmap_dma(ctxt);
 
        switch (ctxt->wr_op) {
        case IB_WR_SEND:
-               if (ctxt->frmr)
-                       pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
-               svc_rdma_put_context(ctxt, 1);
+               free_pages = 1;
                break;
 
        case IB_WR_RDMA_WRITE:
-               if (ctxt->frmr)
-                       pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
-               svc_rdma_put_context(ctxt, 0);
                break;
 
        case IB_WR_RDMA_READ:
        case IB_WR_RDMA_READ_WITH_INV:
                svc_rdma_put_frmr(xprt, ctxt->frmr);
-               if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-                       struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-                       if (read_hdr) {
-                               spin_lock_bh(&xprt->sc_rq_dto_lock);
-                               set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-                               list_add_tail(&read_hdr->dto_q,
-                                             &xprt->sc_read_complete_q);
-                               spin_unlock_bh(&xprt->sc_rq_dto_lock);
-                       } else {
-                               pr_err("svcrdma: ctxt->read_hdr == NULL\n");
-                       }
-                       svc_xprt_enqueue(&xprt->sc_xprt);
-               }
+
+               if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+                       break;
+
+               read_hdr = ctxt->read_hdr;
                svc_rdma_put_context(ctxt, 0);
-               break;
+
+               spin_lock_bh(&xprt->sc_rq_dto_lock);
+               set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+               list_add_tail(&read_hdr->dto_q,
+                             &xprt->sc_read_complete_q);
+               spin_unlock_bh(&xprt->sc_rq_dto_lock);
+               svc_xprt_enqueue(&xprt->sc_xprt);
+               return;
 
        default:
-               printk(KERN_ERR "svcrdma: unexpected completion type, "
-                      "opcode=%d\n",
-                      ctxt->wr_op);
+               dprintk("svcrdma: unexpected completion opcode=%d\n",
+                       ctxt->wr_op);
                break;
        }
+
+       svc_rdma_put_context(ctxt, free_pages);
 }
 
 /*
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+       INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+       INIT_LIST_HEAD(&cma_xprt->sc_maps);
        init_waitqueue_head(&cma_xprt->sc_send_wait);
 
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
-       cma_xprt->sc_ord = svcrdma_ord;
-
-       cma_xprt->sc_max_req_size = svcrdma_max_req_size;
-       cma_xprt->sc_max_requests = svcrdma_max_requests;
-       cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
-       atomic_set(&cma_xprt->sc_sq_count, 0);
-       atomic_set(&cma_xprt->sc_ctxt_used, 0);
+       spin_lock_init(&cma_xprt->sc_ctxt_lock);
+       spin_lock_init(&cma_xprt->sc_map_lock);
 
        if (listener)
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        return cma_xprt;
 }
 
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 {
        struct ib_recv_wr recv_wr, *bad_recv_wr;
        struct svc_rdma_op_ctxt *ctxt;
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                        pr_err("svcrdma: Too many sges (%d)\n", sge_no);
                        goto err_put_ctxt;
                }
-               page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+               page = alloc_page(flags);
+               if (!page)
+                       goto err_put_ctxt;
                ctxt->pages[sge_no] = page;
                pa = ib_dma_map_page(xprt->sc_cm_id->device,
                                     page, 0, PAGE_SIZE,
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                atomic_inc(&xprt->sc_dma_used);
                ctxt->sge[sge_no].addr = pa;
                ctxt->sge[sge_no].length = PAGE_SIZE;
-               ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+               ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
                ctxt->count = sge_no + 1;
                buflen += PAGE_SIZE;
        }
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        struct rdma_conn_param conn_param;
        struct ib_cq_init_attr cq_attr = {};
        struct ib_qp_init_attr qp_attr;
-       struct ib_device_attr devattr;
-       int uninitialized_var(dma_mr_acc);
-       int need_dma_mr = 0;
-       int ret;
-       int i;
+       struct ib_device *dev;
+       unsigned int i;
+       int ret = 0;
 
        listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
        clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
                newxprt, newxprt->sc_cm_id);
 
-       ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
-       if (ret) {
-               dprintk("svcrdma: could not query device attributes on "
-                       "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
-               goto errout;
-       }
+       dev = newxprt->sc_cm_id->device;
 
        /* Qualify the transport resource defaults with the
         * capabilities of this particular device */
-       newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+       newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
                                  (size_t)RPCSVC_MAXPAGES);
-       newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+       newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
                                       RPCSVC_MAXPAGES);
-       newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
-                                  (size_t)svcrdma_max_requests);
-       newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+       newxprt->sc_max_req_size = svcrdma_max_req_size;
+       newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+                                        svcrdma_max_requests);
+       newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+                                           svcrdma_max_bc_requests);
+       newxprt->sc_rq_depth = newxprt->sc_max_requests +
+                              newxprt->sc_max_bc_requests;
+       newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+       if (!svc_rdma_prealloc_ctxts(newxprt))
+               goto errout;
+       if (!svc_rdma_prealloc_maps(newxprt))
+               goto errout;
 
        /*
         * Limit ORD based on client limit, local device limit, and
         * configured svcrdma limit.
         */
-       newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+       newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
        newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
 
-       newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+       newxprt->sc_pd = ib_alloc_pd(dev);
        if (IS_ERR(newxprt->sc_pd)) {
                dprintk("svcrdma: error creating PD for connect request\n");
                goto errout;
        }
        cq_attr.cqe = newxprt->sc_sq_depth;
-       newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+       newxprt->sc_sq_cq = ib_create_cq(dev,
                                         sq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
-       cq_attr.cqe = newxprt->sc_max_requests;
-       newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+       cq_attr.cqe = newxprt->sc_rq_depth;
+       newxprt->sc_rq_cq = ib_create_cq(dev,
                                         rq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        qp_attr.event_handler = qp_event_handler;
        qp_attr.qp_context = &newxprt->sc_xprt;
        qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
-       qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+       qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
        qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
        qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
        qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                "    cap.max_send_sge = %d\n"
                "    cap.max_recv_sge = %d\n",
                newxprt->sc_cm_id, newxprt->sc_pd,
-               newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+               dev, newxprt->sc_pd->device,
                qp_attr.cap.max_send_wr,
                qp_attr.cap.max_recv_wr,
                qp_attr.cap.max_send_sge,
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
         *      of an RDMA_READ. IB does not.
         */
        newxprt->sc_reader = rdma_read_chunk_lcl;
-       if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+       if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
                newxprt->sc_frmr_pg_list_len =
-                       devattr.max_fast_reg_page_list_len;
+                       dev->attrs.max_fast_reg_page_list_len;
                newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
                newxprt->sc_reader = rdma_read_chunk_frmr;
        }
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        /*
         * Determine if a DMA MR is required and if so, what privs are required
         */
-       if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                                newxprt->sc_cm_id->port_num) &&
-           !rdma_ib_or_roce(newxprt->sc_cm_id->device,
-                            newxprt->sc_cm_id->port_num))
+       if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+           !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
                goto errout;
 
-       if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
-           !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
-               need_dma_mr = 1;
-               dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                                       newxprt->sc_cm_id->port_num) &&
-                   !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
-                       dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
-       }
-
-       if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
-                               newxprt->sc_cm_id->port_num))
+       if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
                newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
 
-       /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
-       if (need_dma_mr) {
-               /* Register all of physical memory */
-               newxprt->sc_phys_mr =
-                       ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
-               if (IS_ERR(newxprt->sc_phys_mr)) {
-                       dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
-                               ret);
-                       goto errout;
-               }
-               newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
-       } else
-               newxprt->sc_dma_lkey =
-                       newxprt->sc_cm_id->device->local_dma_lkey;
-
        /* Post receive buffers */
-       for (i = 0; i < newxprt->sc_max_requests; i++) {
-               ret = svc_rdma_post_recv(newxprt);
+       for (i = 0; i < newxprt->sc_rq_depth; i++) {
+               ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
                if (ret) {
                        dprintk("svcrdma: failure posting receive buffers\n");
                        goto errout;
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
 {
        struct svcxprt_rdma *rdma =
                container_of(work, struct svcxprt_rdma, sc_work);
-       dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+       struct svc_xprt *xprt = &rdma->sc_xprt;
+
+       dprintk("svcrdma: %s(%p)\n", __func__, rdma);
 
        /* We should only be called from kref_put */
-       if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+       if (atomic_read(&xprt->xpt_ref.refcount) != 0)
                pr_err("svcrdma: sc_xprt still in use? (%d)\n",
-                      atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+                      atomic_read(&xprt->xpt_ref.refcount));
 
        /*
         * Destroy queued, but not processed read completions. Note
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
        }
 
        /* Warn if we leaked a resource or under-referenced */
-       if (atomic_read(&rdma->sc_ctxt_used) != 0)
+       if (rdma->sc_ctxt_used != 0)
                pr_err("svcrdma: ctxt still in use? (%d)\n",
-                      atomic_read(&rdma->sc_ctxt_used));
+                      rdma->sc_ctxt_used);
        if (atomic_read(&rdma->sc_dma_used) != 0)
                pr_err("svcrdma: dma still in use? (%d)\n",
                       atomic_read(&rdma->sc_dma_used));
 
-       /* De-allocate fastreg mr */
+       /* Final put of backchannel client transport */
+       if (xprt->xpt_bc_xprt) {
+               xprt_put(xprt->xpt_bc_xprt);
+               xprt->xpt_bc_xprt = NULL;
+       }
+
        rdma_dealloc_frmr_q(rdma);
+       svc_rdma_destroy_ctxts(rdma);
+       svc_rdma_destroy_maps(rdma);
 
        /* Destroy the QP if present (not a listener) */
        if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
        if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
                ib_destroy_cq(rdma->sc_rq_cq);
 
-       if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
-               ib_dereg_mr(rdma->sc_phys_mr);
-
        if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
                ib_dealloc_pd(rdma->sc_pd);
 
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
        int length;
        int ret;
 
-       p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+       p = alloc_page(GFP_KERNEL);
+       if (!p)
+               return;
        va = page_address(p);
 
        /* XDR encode error */
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
                return;
        }
        atomic_inc(&xprt->sc_dma_used);
-       ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+       ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
        ctxt->sge[0].length = length;
 
        /* Prepare SEND WR */
index 740bddcf3488fe9c2e6db48f0b7967909a4caca0..b1b009f10ea375a3f63148973e58e486df1aa282 100644 (file)
@@ -63,7 +63,7 @@
  */
 
 static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
 static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
 
 #endif
 
-#define RPCRDMA_BIND_TO                (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO  (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO   (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO   (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs;    /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs;    /*forward reference */
 
 static void
 xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
        xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
 }
 
-static void
+void
 xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
 {
        char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
        xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 }
 
-static void
+void
 xprt_rdma_free_addresses(struct rpc_xprt *xprt)
 {
        unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
        if (req == NULL)
                return NULL;
 
-       flags = GFP_NOIO | __GFP_NOWARN;
+       flags = RPCRDMA_DEF_GFP;
        if (RPC_IS_SWAPPER(task))
                flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
 
@@ -642,7 +637,7 @@ drop_connection:
        return -ENOTCONN;       /* implies disconnect */
 }
 
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        long idle_time = 0;
@@ -743,6 +738,11 @@ void xprt_rdma_cleanup(void)
 
        rpcrdma_destroy_wq();
        frwr_destroy_recovery_wq();
+
+       rc = xprt_unregister_transport(&xprt_rdma_bc);
+       if (rc)
+               dprintk("RPC:       %s: xprt_unregister(bc) returned %i\n",
+                       __func__, rc);
 }
 
 int xprt_rdma_init(void)
@@ -766,6 +766,14 @@ int xprt_rdma_init(void)
                return rc;
        }
 
+       rc = xprt_register_transport(&xprt_rdma_bc);
+       if (rc) {
+               xprt_unregister_transport(&xprt_rdma);
+               rpcrdma_destroy_wq();
+               frwr_destroy_recovery_wq();
+               return rc;
+       }
+
        dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
 
        dprintk("Defaults:\n");
index 732c71ce5dcab6758da0fe7661b877bfd8fa77ad..878f1bfb1db98f6972e641130d36a80cdf97faf8 100644 (file)
@@ -462,7 +462,6 @@ int
 rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 {
        struct rpcrdma_ia *ia = &xprt->rx_ia;
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        int rc;
 
        ia->ri_dma_mr = NULL;
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                goto out2;
        }
 
-       rc = ib_query_device(ia->ri_device, devattr);
-       if (rc) {
-               dprintk("RPC:       %s: ib_query_device failed %d\n",
-                       __func__, rc);
-               goto out3;
-       }
-
        if (memreg == RPCRDMA_FRMR) {
-               if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
-                   (devattr->max_fast_reg_page_list_len == 0)) {
+               if (!(ia->ri_device->attrs.device_cap_flags &
+                               IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+                   (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
                        dprintk("RPC:       %s: FRMR registration "
                                "not supported by HCA\n", __func__);
                        memreg = RPCRDMA_MTHCAFMR;
@@ -566,24 +559,23 @@ int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                                struct rpcrdma_create_data_internal *cdata)
 {
-       struct ib_device_attr *devattr = &ia->ri_devattr;
        struct ib_cq *sendcq, *recvcq;
        struct ib_cq_init_attr cq_attr = {};
        unsigned int max_qp_wr;
        int rc, err;
 
-       if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+       if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
                dprintk("RPC:       %s: insufficient sge's available\n",
                        __func__);
                return -ENOMEM;
        }
 
-       if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+       if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
                dprintk("RPC:       %s: insufficient wqe's available\n",
                        __func__);
                return -ENOMEM;
        }
-       max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
 
        /* check provider's send/recv wr limits */
        if (cdata->max_requests > max_qp_wr)
@@ -668,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
        /* Client offers RDMA Read but does not initiate */
        ep->rep_remote_cma.initiator_depth = 0;
-       if (devattr->max_qp_rd_atom > 32)       /* arbitrary but <= 255 */
+       if (ia->ri_device->attrs.max_qp_rd_atom > 32)   /* arbitrary but <= 255 */
                ep->rep_remote_cma.responder_resources = 32;
        else
                ep->rep_remote_cma.responder_resources =
-                                               devattr->max_qp_rd_atom;
+                                               ia->ri_device->attrs.max_qp_rd_atom;
 
        ep->rep_remote_cma.retry_count = 7;
        ep->rep_remote_cma.flow_control = 0;
index 728101ddc44bfc4b3b6247c8d13f3222cb0f1386..38fe11b0987528c3d345676202db487cae269a87 100644 (file)
 #define RDMA_RESOLVE_TIMEOUT   (5000)  /* 5 seconds */
 #define RDMA_CONNECT_RETRY_MAX (2)     /* retries if no listener backlog */
 
+#define RPCRDMA_BIND_TO                (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO  (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO   (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO   (5U * 60 * HZ)
+
 /*
  * Interface Adapter -- one per transport instance
  */
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
        struct completion       ri_done;
        int                     ri_async_rc;
        unsigned int            ri_max_frmr_depth;
-       struct ib_device_attr   ri_devattr;
        struct ib_qp_attr       ri_qp_attr;
        struct ib_qp_init_attr  ri_qp_init_attr;
 };
@@ -142,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
        return (struct rpcrdma_msg *)rb->rg_base;
 }
 
+#define RPCRDMA_DEF_GFP                (GFP_NOIO | __GFP_NOWARN)
+
 /*
  * struct rpcrdma_rep -- this structure encapsulates state required to recv
  * and complete a reply, asychronously. It needs several pieces of
@@ -309,6 +315,8 @@ struct rpcrdma_buffer {
        u32                     rb_bc_srv_max_requests;
        spinlock_t              rb_reqslock;    /* protect rb_allreqs */
        struct list_head        rb_allreqs;
+
+       u32                     rb_bc_max_requests;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
 
@@ -516,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
 int xprt_rdma_init(void);
 void xprt_rdma_cleanup(void);
 
@@ -531,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
 void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-/* Temporary NFS request map cache. Created in svc_rdma.c  */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c  */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+extern struct xprt_class xprt_rdma_bc;
 
 #endif                         /* _LINUX_SUNRPC_XPRT_RDMA_H */
index ebc661d3b6e37edb55d7c0a719ed9937a9f27a0d..47f7da58a7f0e93e3ffd632396a5a8db65c7f7c3 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
 #include <net/ip_fib.h>
 #include <net/switchdev.h>
 
@@ -567,7 +568,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
 
-static DEFINE_MUTEX(switchdev_mutex);
 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -582,9 +582,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
 {
        int err;
 
-       mutex_lock(&switchdev_mutex);
+       rtnl_lock();
        err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
-       mutex_unlock(&switchdev_mutex);
+       rtnl_unlock();
        return err;
 }
 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -600,9 +600,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
 {
        int err;
 
-       mutex_lock(&switchdev_mutex);
+       rtnl_lock();
        err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
-       mutex_unlock(&switchdev_mutex);
+       rtnl_unlock();
        return err;
 }
 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,16 +616,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
  *     Call all network notifier blocks. This should be called by driver
  *     when it needs to propagate hardware event.
  *     Return values are same as for atomic_notifier_call_chain().
+ *     rtnl_lock must be held.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
                             struct switchdev_notifier_info *info)
 {
        int err;
 
+       ASSERT_RTNL();
+
        info->dev = dev;
-       mutex_lock(&switchdev_mutex);
        err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
-       mutex_unlock(&switchdev_mutex);
        return err;
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
index 350cca33ee0a64b08ce6135aa130011263eba136..69ee2eeef968851192035cb166410f1f37e7722f 100644 (file)
@@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
                                struct sockaddr_tipc *addr, void *usr_data,
                                void *buf, size_t len)
 {
-       struct tipc_subscriber *subscriber = usr_data;
+       struct tipc_subscriber *subscrb = usr_data;
        struct tipc_subscription *sub = NULL;
        struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-       tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
-       if (sub)
-               tipc_nametbl_subscribe(sub);
-       else
-               tipc_conn_terminate(tn->topsrv, subscriber->conid);
+       if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
+               return tipc_conn_terminate(tn->topsrv, subscrb->conid);
+
+       tipc_nametbl_subscribe(sub);
 }
 
 /* Handle one request to establish a new subscriber */
index c5bf5ef2bf89476b6d7ab766b8591e6af2e869fd..49d5093eb0553a4416af09784b0e012f58cbd124 100644 (file)
@@ -2339,6 +2339,7 @@ again:
 
                        if (signal_pending(current)) {
                                err = sock_intr_errno(timeo);
+                               scm_destroy(&scm);
                                goto out;
                        }
 
index 3b0ce1c484a3dd33f2e2c81edeeadb79a48e2dcc..547ceecc052310f3db3be04d827dff22d567dc1e 100644 (file)
@@ -231,20 +231,22 @@ static const struct ieee80211_regdomain world_regdom = {
                /* IEEE 802.11b/g, channels 1..11 */
                REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
                /* IEEE 802.11b/g, channels 12..13. */
-               REG_RULE(2467-10, 2472+10, 40, 6, 20,
-                       NL80211_RRF_NO_IR),
+               REG_RULE(2467-10, 2472+10, 20, 6, 20,
+                       NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
                /* IEEE 802.11 channel 14 - Only JP enables
                 * this and for 802.11b only */
                REG_RULE(2484-10, 2484+10, 20, 6, 20,
                        NL80211_RRF_NO_IR |
                        NL80211_RRF_NO_OFDM),
                /* IEEE 802.11a, channel 36..48 */
-               REG_RULE(5180-10, 5240+10, 160, 6, 20,
-                        NL80211_RRF_NO_IR),
+               REG_RULE(5180-10, 5240+10, 80, 6, 20,
+                        NL80211_RRF_NO_IR |
+                        NL80211_RRF_AUTO_BW),
 
                /* IEEE 802.11a, channel 52..64 - DFS required */
-               REG_RULE(5260-10, 5320+10, 160, 6, 20,
+               REG_RULE(5260-10, 5320+10, 80, 6, 20,
                        NL80211_RRF_NO_IR |
+                       NL80211_RRF_AUTO_BW |
                        NL80211_RRF_DFS),
 
                /* IEEE 802.11a, channel 100..144 - DFS required */
@@ -2745,7 +2747,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
        const struct ieee80211_power_rule *power_rule = NULL;
        char bw[32], cac_time[32];
 
-       pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
+       pr_debug("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
 
        for (i = 0; i < rd->n_reg_rules; i++) {
                reg_rule = &rd->reg_rules[i];
@@ -2772,7 +2774,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
                 * in certain regions
                 */
                if (power_rule->max_antenna_gain)
-                       pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
+                       pr_debug("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
                                freq_range->start_freq_khz,
                                freq_range->end_freq_khz,
                                bw,
@@ -2780,7 +2782,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
                                power_rule->max_eirp,
                                cac_time);
                else
-                       pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
+                       pr_debug("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
                                freq_range->start_freq_khz,
                                freq_range->end_freq_khz,
                                bw,
@@ -2813,35 +2815,35 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
                        struct cfg80211_registered_device *rdev;
                        rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
                        if (rdev) {
-                               pr_info("Current regulatory domain updated by AP to: %c%c\n",
+                               pr_debug("Current regulatory domain updated by AP to: %c%c\n",
                                        rdev->country_ie_alpha2[0],
                                        rdev->country_ie_alpha2[1]);
                        } else
-                               pr_info("Current regulatory domain intersected:\n");
+                               pr_debug("Current regulatory domain intersected:\n");
                } else
-                       pr_info("Current regulatory domain intersected:\n");
+                       pr_debug("Current regulatory domain intersected:\n");
        } else if (is_world_regdom(rd->alpha2)) {
-               pr_info("World regulatory domain updated:\n");
+               pr_debug("World regulatory domain updated:\n");
        } else {
                if (is_unknown_alpha2(rd->alpha2))
-                       pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
+                       pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
                else {
                        if (reg_request_cell_base(lr))
-                               pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
+                               pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
                                        rd->alpha2[0], rd->alpha2[1]);
                        else
-                               pr_info("Regulatory domain changed to country: %c%c\n",
+                               pr_debug("Regulatory domain changed to country: %c%c\n",
                                        rd->alpha2[0], rd->alpha2[1]);
                }
        }
 
-       pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
+       pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
        print_rd_rules(rd);
 }
 
 static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 {
-       pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
+       pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
        print_rd_rules(rd);
 }
 
@@ -2862,7 +2864,8 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
                return -EALREADY;
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
@@ -2898,7 +2901,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
                return -EALREADY;
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
@@ -2956,7 +2960,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
         */
 
        if (!is_valid_rd(rd)) {
-               pr_err("Invalid regulatory domain detected:\n");
+               pr_err("Invalid regulatory domain detected: %c%c\n",
+                      rd->alpha2[0], rd->alpha2[1]);
                print_regdomain_info(rd);
                return -EINVAL;
        }
index e080746e1a6b528a218b416b5344368cd6db7065..48958d3cec9e38473ff3296f7f6bf4d5db41612a 100644 (file)
@@ -594,7 +594,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname)
                if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
                    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
                    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+                   strcmp(symname, ".TOC.") == 0)
                        return 1;
        /* Do not ignore this symbol */
        return 0;
diff --git a/scripts/prune-kernel b/scripts/prune-kernel
new file mode 100755 (executable)
index 0000000..ab5034e
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# because I use CONFIG_LOCALVERSION_AUTO, not the same version again and
+# again, /boot and /lib/modules/ eventually fill up.
+# Dumb script to purge that stuff:
+
+for f in "$@"
+do
+        if rpm -qf "/lib/modules/$f" >/dev/null; then
+                echo "keeping $f (installed from rpm)"
+        elif [ $(uname -r) = "$f" ]; then
+                echo "keeping $f (running kernel) "
+        else
+                echo "removing $f"
+                rm -f "/boot/initramfs-$f.img" "/boot/System.map-$f"
+                rm -f "/boot/vmlinuz-$f"   "/boot/config-$f"
+                rm -rf "/lib/modules/$f"
+                new-kernel-pkg --remove $f
+        fi
+done
index 16622aef9bdea83bee67e5e0080c7b9a17d240ae..28414b0207ce58f1fdd8503da0f61a39ba5c7cfe 100644 (file)
@@ -99,7 +99,7 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
 
        dir = d_inode(parent);
 
-       mutex_lock(&dir->i_mutex);
+       inode_lock(dir);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry))
                goto out;
@@ -129,14 +129,14 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
        }
        d_instantiate(dentry, inode);
        dget(dentry);
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        return dentry;
 
 out1:
        dput(dentry);
        dentry = ERR_PTR(error);
 out:
-       mutex_unlock(&dir->i_mutex);
+       inode_unlock(dir);
        simple_release_fs(&mount, &mount_count);
        return dentry;
 }
@@ -195,7 +195,7 @@ void securityfs_remove(struct dentry *dentry)
        if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&d_inode(parent)->i_mutex);
+       inode_lock(d_inode(parent));
        if (simple_positive(dentry)) {
                if (d_is_dir(dentry))
                        simple_rmdir(d_inode(parent), dentry);
@@ -203,7 +203,7 @@ void securityfs_remove(struct dentry *dentry)
                        simple_unlink(d_inode(parent), dentry);
                dput(dentry);
        }
-       mutex_unlock(&d_inode(parent)->i_mutex);
+       inode_unlock(d_inode(parent));
        simple_release_fs(&mount, &mount_count);
 }
 EXPORT_SYMBOL_GPL(securityfs_remove);
index c21f09bf8b99210f3c3dcbff4497e1194848db2c..9d96551d0196cb3e7136ace6e747f9504c07fc7a 100644 (file)
@@ -121,7 +121,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
        if (!(mode & FMODE_WRITE))
                return;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        if (atomic_read(&inode->i_writecount) == 1) {
                if ((iint->version != inode->i_version) ||
                    (iint->flags & IMA_NEW_FILE)) {
@@ -130,7 +130,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
                                ima_update_xattr(iint, file);
                }
        }
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
 }
 
 /**
@@ -186,7 +186,7 @@ static int process_measurement(struct file *file, int mask, int function,
        if (action & IMA_FILE_APPRAISE)
                function = FILE_CHECK;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (action) {
                iint = integrity_inode_get(inode);
@@ -250,7 +250,7 @@ out_free:
        if (pathbuf)
                __putname(pathbuf);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
                return -EACCES;
        return 0;
index 07a87311055c5b50916c87f2117f4c70b0321cfd..09ef276c4bdcaf8e0c419df61d1cd7b07bf5253c 100644 (file)
@@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key,
 
                        /* and link it into the destination keyring */
                        if (keyring) {
-                               set_bit(KEY_FLAG_KEEP, &key->flags);
+                               if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+                                       set_bit(KEY_FLAG_KEEP, &key->flags);
 
                                __key_link(key, _edit);
                        }
index 732c1c77dccd8be483d382e621569e4008b4db0c..1b1fd27de632692dc307e5a72cf65898f0e245b3 100644 (file)
@@ -380,9 +380,9 @@ static int sel_open_policy(struct inode *inode, struct file *filp)
                goto err;
 
        if (i_size_read(inode) != security_policydb_len()) {
-               mutex_lock(&inode->i_mutex);
+               inode_lock(inode);
                i_size_write(inode, security_policydb_len());
-               mutex_unlock(&inode->i_mutex);
+               inode_unlock(inode);
        }
 
        rc = security_read_policy(&plm->data, &plm->len);
index e3e949126a5639c6a4a623750ec35922704c5110..a2a1e24becc6b8f2f6288a8853b02e6dd884becd 100644 (file)
@@ -97,11 +97,11 @@ config SND_PCM_TIMER
        bool "PCM timer interface" if EXPERT
        default y
        help
-         If you disable this option, pcm timer will be inavailable, so
-         those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+         If you disable this option, pcm timer will be unavailable, so
+         those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
          incorrectlly.
 
-         For some embedded device, we may disable it to reduce memory
+         For some embedded devices, we may disable it to reduce memory
          footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS
index 18b8dc45bb8f77ea0c2ce16e79305c7872f057bc..7fac3cae8abd0a232a3d7a1c2563999495df6ec2 100644 (file)
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *     SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@ out:
        return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@ out:
        kfree(caps);
        return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
        case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
                retval = snd_compr_get_caps(stream, arg);
                break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
        case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
                retval = snd_compr_get_codec_caps(stream, arg);
                break;
+#endif
        case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
                retval = snd_compr_set_params(stream, arg);
                break;
index 196a6fe100ca8f40cc7f9b01eb00dc81c713ba93..a85d45595d02a265f1f8e1a4cd36c812d8cd08f0 100644 (file)
@@ -1405,6 +1405,8 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
                return -EFAULT;
        if (tlv.length < sizeof(unsigned int) * 2)
                return -EINVAL;
+       if (!tlv.numid)
+               return -EINVAL;
        down_read(&card->controls_rwsem);
        kctl = snd_ctl_find_numid(card, tlv.numid);
        if (kctl == NULL) {
index f845ecf7e172935f938bc52ecfe9c95c7bac4e11..656d9a9032dc2165d1f30f4ef193250a07bec555 100644 (file)
@@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t)
        struct snd_hrtimer *stime = t->private_data;
 
        atomic_set(&stime->running, 0);
-       hrtimer_cancel(&stime->hrt);
+       hrtimer_try_to_cancel(&stime->hrt);
        hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution),
                      HRTIMER_MODE_REL);
        atomic_set(&stime->running, 1);
@@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t)
 {
        struct snd_hrtimer *stime = t->private_data;
        atomic_set(&stime->running, 0);
+       hrtimer_try_to_cancel(&stime->hrt);
        return 0;
 }
 
index 0e73d03b30e3f0d712b26f65453346e419caa96d..ebc9fdfe64df618088a8d55efff8b5312af3385d 100644 (file)
@@ -835,7 +835,8 @@ static int choose_rate(struct snd_pcm_substream *substream,
        return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL);
 }
 
-static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
+static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream,
+                                    bool trylock)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_pcm_hw_params *params, *sparams;
@@ -849,7 +850,10 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream)
        struct snd_mask sformat_mask;
        struct snd_mask mask;
 
-       if (mutex_lock_interruptible(&runtime->oss.params_lock))
+       if (trylock) {
+               if (!(mutex_trylock(&runtime->oss.params_lock)))
+                       return -EAGAIN;
+       } else if (mutex_lock_interruptible(&runtime->oss.params_lock))
                return -EINTR;
        sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL);
        params = kmalloc(sizeof(*params), GFP_KERNEL);
@@ -1092,7 +1096,7 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil
                if (asubstream == NULL)
                        asubstream = substream;
                if (substream->runtime->oss.params) {
-                       err = snd_pcm_oss_change_params(substream);
+                       err = snd_pcm_oss_change_params(substream, false);
                        if (err < 0)
                                return err;
                }
@@ -1132,7 +1136,7 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream)
                return 0;
        runtime = substream->runtime;
        if (runtime->oss.params) {
-               err = snd_pcm_oss_change_params(substream);
+               err = snd_pcm_oss_change_params(substream, false);
                if (err < 0)
                        return err;
        }
@@ -2163,7 +2167,7 @@ static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stre
        runtime = substream->runtime;
 
        if (runtime->oss.params &&
-           (err = snd_pcm_oss_change_params(substream)) < 0)
+           (err = snd_pcm_oss_change_params(substream, false)) < 0)
                return err;
 
        info.fragsize = runtime->oss.period_bytes;
@@ -2804,7 +2808,12 @@ static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area)
                return -EIO;
        
        if (runtime->oss.params) {
-               if ((err = snd_pcm_oss_change_params(substream)) < 0)
+               /* use mutex_trylock() for params_lock for avoiding a deadlock
+                * between mmap_sem and params_lock taken by
+                * copy_from/to_user() in snd_pcm_oss_write/read()
+                */
+               err = snd_pcm_oss_change_params(substream, true);
+               if (err < 0)
                        return err;
        }
 #ifdef CONFIG_SND_PCM_OSS_PLUGINS
index b48b434444ed0e0239936887a891d15da0054e00..9630e9f72b7ba2ad77f50a516ecc559c0c200975 100644 (file)
@@ -255,10 +255,15 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
        if (! (runtime = substream->runtime))
                return -ENOTTY;
 
-       /* only fifo_size is different, so just copy all */
-       data = memdup_user(data32, sizeof(*data32));
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       /* only fifo_size (RO from userspace) is different, so just copy all */
+       if (copy_from_user(data, data32, sizeof(*data32))) {
+               err = -EFAULT;
+               goto error;
+       }
 
        if (refine)
                err = snd_pcm_hw_refine(substream, data);
index a7759846fbaadff0c9493760ce7b14eff7ca8ad9..795437b1008200cd534f9a46ad0272f3dbc20ca4 100644 (file)
@@ -942,31 +942,36 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
        unsigned long flags;
        long result = 0, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
+       unsigned long appl_ptr;
 
+       spin_lock_irqsave(&runtime->lock, flags);
        while (count > 0 && runtime->avail) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
                        count1 = count;
-               spin_lock_irqsave(&runtime->lock, flags);
                if (count1 > (int)runtime->avail)
                        count1 = runtime->avail;
+
+               /* update runtime->appl_ptr before unlocking for userbuf */
+               appl_ptr = runtime->appl_ptr;
+               runtime->appl_ptr += count1;
+               runtime->appl_ptr %= runtime->buffer_size;
+               runtime->avail -= count1;
+
                if (kernelbuf)
-                       memcpy(kernelbuf + result, runtime->buffer + runtime->appl_ptr, count1);
+                       memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1);
                if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
                        if (copy_to_user(userbuf + result,
-                                        runtime->buffer + runtime->appl_ptr, count1)) {
+                                        runtime->buffer + appl_ptr, count1)) {
                                return result > 0 ? result : -EFAULT;
                        }
                        spin_lock_irqsave(&runtime->lock, flags);
                }
-               runtime->appl_ptr += count1;
-               runtime->appl_ptr %= runtime->buffer_size;
-               runtime->avail -= count1;
-               spin_unlock_irqrestore(&runtime->lock, flags);
                result += count1;
                count -= count1;
        }
+       spin_unlock_irqrestore(&runtime->lock, flags);
        return result;
 }
 
@@ -1055,23 +1060,16 @@ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream)
 EXPORT_SYMBOL(snd_rawmidi_transmit_empty);
 
 /**
- * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * __snd_rawmidi_transmit_peek - copy data from the internal buffer
  * @substream: the rawmidi substream
  * @buffer: the buffer pointer
  * @count: data size to transfer
  *
- * Copies data from the internal output buffer to the given buffer.
- *
- * Call this in the interrupt handler when the midi output is ready,
- * and call snd_rawmidi_transmit_ack() after the transmission is
- * finished.
- *
- * Return: The size of copied data, or a negative error code on failure.
+ * This is a variant of snd_rawmidi_transmit_peek() without spinlock.
  */
-int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                              unsigned char *buffer, int count)
 {
-       unsigned long flags;
        int result, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
 
@@ -1081,7 +1079,6 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                return -EINVAL;
        }
        result = 0;
-       spin_lock_irqsave(&runtime->lock, flags);
        if (runtime->avail >= runtime->buffer_size) {
                /* warning: lowlevel layer MUST trigger down the hardware */
                goto __skip;
@@ -1106,25 +1103,47 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                }
        }
       __skip:
+       return result;
+}
+EXPORT_SYMBOL(__snd_rawmidi_transmit_peek);
+
+/**
+ * snd_rawmidi_transmit_peek - copy data from the internal buffer
+ * @substream: the rawmidi substream
+ * @buffer: the buffer pointer
+ * @count: data size to transfer
+ *
+ * Copies data from the internal output buffer to the given buffer.
+ *
+ * Call this in the interrupt handler when the midi output is ready,
+ * and call snd_rawmidi_transmit_ack() after the transmission is
+ * finished.
+ *
+ * Return: The size of copied data, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+                             unsigned char *buffer, int count)
+{
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
+       result = __snd_rawmidi_transmit_peek(substream, buffer, count);
        spin_unlock_irqrestore(&runtime->lock, flags);
        return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit_peek);
 
 /**
- * snd_rawmidi_transmit_ack - acknowledge the transmission
+ * __snd_rawmidi_transmit_ack - acknowledge the transmission
  * @substream: the rawmidi substream
  * @count: the transferred count
  *
- * Advances the hardware pointer for the internal output buffer with
- * the given size and updates the condition.
- * Call after the transmission is finished.
- *
- * Return: The advanced size if successful, or a negative error code on failure.
+ * This is a variant of __snd_rawmidi_transmit_ack() without spinlock.
  */
-int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
 {
-       unsigned long flags;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
 
        if (runtime->buffer == NULL) {
@@ -1132,7 +1151,6 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
                          "snd_rawmidi_transmit_ack: output is not active!!!\n");
                return -EINVAL;
        }
-       spin_lock_irqsave(&runtime->lock, flags);
        snd_BUG_ON(runtime->avail + count > runtime->buffer_size);
        runtime->hw_ptr += count;
        runtime->hw_ptr %= runtime->buffer_size;
@@ -1142,9 +1160,32 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
                if (runtime->drain || snd_rawmidi_ready(substream))
                        wake_up(&runtime->sleep);
        }
-       spin_unlock_irqrestore(&runtime->lock, flags);
        return count;
 }
+EXPORT_SYMBOL(__snd_rawmidi_transmit_ack);
+
+/**
+ * snd_rawmidi_transmit_ack - acknowledge the transmission
+ * @substream: the rawmidi substream
+ * @count: the transferred count
+ *
+ * Advances the hardware pointer for the internal output buffer with
+ * the given size and updates the condition.
+ * Call after the transmission is finished.
+ *
+ * Return: The advanced size if successful, or a negative error code on failure.
+ */
+int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
+{
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
+       result = __snd_rawmidi_transmit_ack(substream, count);
+       spin_unlock_irqrestore(&runtime->lock, flags);
+       return result;
+}
 EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 
 /**
@@ -1160,12 +1201,22 @@ EXPORT_SYMBOL(snd_rawmidi_transmit_ack);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count)
 {
+       struct snd_rawmidi_runtime *runtime = substream->runtime;
+       int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&runtime->lock, flags);
        if (!substream->opened)
-               return -EBADFD;
-       count = snd_rawmidi_transmit_peek(substream, buffer, count);
-       if (count < 0)
-               return count;
-       return snd_rawmidi_transmit_ack(substream, count);
+               result = -EBADFD;
+       else {
+               count = __snd_rawmidi_transmit_peek(substream, buffer, count);
+               if (count <= 0)
+                       result = count;
+               else
+                       result = __snd_rawmidi_transmit_ack(substream, count);
+       }
+       spin_unlock_irqrestore(&runtime->lock, flags);
+       return result;
 }
 EXPORT_SYMBOL(snd_rawmidi_transmit);
 
@@ -1177,8 +1228,9 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
        unsigned long flags;
        long count1, result;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
+       unsigned long appl_ptr;
 
-       if (snd_BUG_ON(!kernelbuf && !userbuf))
+       if (!kernelbuf && !userbuf)
                return -EINVAL;
        if (snd_BUG_ON(!runtime->buffer))
                return -EINVAL;
@@ -1197,12 +1249,19 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        count1 = count;
                if (count1 > (long)runtime->avail)
                        count1 = runtime->avail;
+
+               /* update runtime->appl_ptr before unlocking for userbuf */
+               appl_ptr = runtime->appl_ptr;
+               runtime->appl_ptr += count1;
+               runtime->appl_ptr %= runtime->buffer_size;
+               runtime->avail -= count1;
+
                if (kernelbuf)
-                       memcpy(runtime->buffer + runtime->appl_ptr,
+                       memcpy(runtime->buffer + appl_ptr,
                               kernelbuf + result, count1);
                else if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
-                       if (copy_from_user(runtime->buffer + runtime->appl_ptr,
+                       if (copy_from_user(runtime->buffer + appl_ptr,
                                           userbuf + result, count1)) {
                                spin_lock_irqsave(&runtime->lock, flags);
                                result = result > 0 ? result : -EFAULT;
@@ -1210,9 +1269,6 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        }
                        spin_lock_irqsave(&runtime->lock, flags);
                }
-               runtime->appl_ptr += count1;
-               runtime->appl_ptr %= runtime->buffer_size;
-               runtime->avail -= count1;
                result += count1;
                count -= count1;
        }
index b1221b29728e115ad95e50d4638de3e3949bb9da..6779e82b46dd7060bd982137ca75cfcdbbfa989e 100644 (file)
@@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level)
 
        dp->index = i;
        if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-               pr_err("ALSA: seq_oss: too many applications\n");
+               pr_debug("ALSA: seq_oss: too many applications\n");
                rc = -ENOMEM;
                goto _error;
        }
index 0f3b38184fe58da2809f9fa4fa9c79a83967c59e..b16dbef041747e1762c3285875a5573ac119b031 100644 (file)
@@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
        struct seq_oss_synth *rec;
        struct seq_oss_synthinfo *info;
 
-       if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+       if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
                return;
        for (i = 0; i < dp->max_synthdev; i++) {
                info = &dp->synths[i];
index 13cfa815732db759935f2daaf4c997c73016eb8c..58e79e02f2174e29f409cf129030af87d240dfce 100644 (file)
@@ -678,6 +678,9 @@ static int deliver_to_subscribers(struct snd_seq_client *client,
        else
                down_read(&grp->list_mutex);
        list_for_each_entry(subs, &grp->list_head, src_list) {
+               /* both ports ready? */
+               if (atomic_read(&subs->ref_count) != 2)
+                       continue;
                event->dest = subs->info.dest;
                if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP)
                        /* convert time according to flag with subscription */
index 81f7c109dc46e1b8ef9b4a2a67477fdf72940ef6..65175902a68a841650ac818f8fde713eca60c5b5 100644 (file)
@@ -49,11 +49,12 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned
        struct snd_seq_port_info *data;
        mm_segment_t fs;
 
-       data = memdup_user(data32, sizeof(*data32));
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
 
-       if (get_user(data->flags, &data32->flags) ||
+       if (copy_from_user(data, data32, sizeof(*data32)) ||
+           get_user(data->flags, &data32->flags) ||
            get_user(data->time_queue, &data32->time_queue))
                goto error;
        data->kernel = NULL;
index 55170a20ae7237246f5560e14c5b5066bb52ba35..921fb2bd8fadb37adf167d6bf52dd97270f005a4 100644 (file)
@@ -173,10 +173,6 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client,
 }
 
 /* */
-enum group_type {
-       SRC_LIST, DEST_LIST
-};
-
 static int subscribe_port(struct snd_seq_client *client,
                          struct snd_seq_client_port *port,
                          struct snd_seq_port_subs_info *grp,
@@ -203,6 +199,20 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
        return NULL;
 }
 
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+                                       struct snd_seq_client_port *port,
+                                       struct snd_seq_subscribers *subs,
+                                       bool is_src, bool ack);
+
+static inline struct snd_seq_subscribers *
+get_subscriber(struct list_head *p, bool is_src)
+{
+       if (is_src)
+               return list_entry(p, struct snd_seq_subscribers, src_list);
+       else
+               return list_entry(p, struct snd_seq_subscribers, dest_list);
+}
+
 /*
  * remove all subscribers on the list
  * this is called from port_delete, for each src and dest list.
@@ -210,7 +220,7 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
 static void clear_subscriber_list(struct snd_seq_client *client,
                                  struct snd_seq_client_port *port,
                                  struct snd_seq_port_subs_info *grp,
-                                 int grptype)
+                                 int is_src)
 {
        struct list_head *p, *n;
 
@@ -219,15 +229,13 @@ static void clear_subscriber_list(struct snd_seq_client *client,
                struct snd_seq_client *c;
                struct snd_seq_client_port *aport;
 
-               if (grptype == SRC_LIST) {
-                       subs = list_entry(p, struct snd_seq_subscribers, src_list);
+               subs = get_subscriber(p, is_src);
+               if (is_src)
                        aport = get_client_port(&subs->info.dest, &c);
-               } else {
-                       subs = list_entry(p, struct snd_seq_subscribers, dest_list);
+               else
                        aport = get_client_port(&subs->info.sender, &c);
-               }
-               list_del(p);
-               unsubscribe_port(client, port, grp, &subs->info, 0);
+               delete_and_unsubscribe_port(client, port, subs, is_src, false);
+
                if (!aport) {
                        /* looks like the connected port is being deleted.
                         * we decrease the counter, and when both ports are deleted
@@ -235,21 +243,14 @@ static void clear_subscriber_list(struct snd_seq_client *client,
                         */
                        if (atomic_dec_and_test(&subs->ref_count))
                                kfree(subs);
-               } else {
-                       /* ok we got the connected port */
-                       struct snd_seq_port_subs_info *agrp;
-                       agrp = (grptype == SRC_LIST) ? &aport->c_dest : &aport->c_src;
-                       down_write(&agrp->list_mutex);
-                       if (grptype == SRC_LIST)
-                               list_del(&subs->dest_list);
-                       else
-                               list_del(&subs->src_list);
-                       up_write(&agrp->list_mutex);
-                       unsubscribe_port(c, aport, agrp, &subs->info, 1);
-                       kfree(subs);
-                       snd_seq_port_unlock(aport);
-                       snd_seq_client_unlock(c);
+                       continue;
                }
+
+               /* ok we got the connected port */
+               delete_and_unsubscribe_port(c, aport, subs, !is_src, true);
+               kfree(subs);
+               snd_seq_port_unlock(aport);
+               snd_seq_client_unlock(c);
        }
 }
 
@@ -262,8 +263,8 @@ static int port_delete(struct snd_seq_client *client,
        snd_use_lock_sync(&port->use_lock); 
 
        /* clear subscribers info */
-       clear_subscriber_list(client, port, &port->c_src, SRC_LIST);
-       clear_subscriber_list(client, port, &port->c_dest, DEST_LIST);
+       clear_subscriber_list(client, port, &port->c_src, true);
+       clear_subscriber_list(client, port, &port->c_dest, false);
 
        if (port->private_free)
                port->private_free(port->private_data);
@@ -479,85 +480,120 @@ static int match_subs_info(struct snd_seq_port_subscribe *r,
        return 0;
 }
 
-
-/* connect two ports */
-int snd_seq_port_connect(struct snd_seq_client *connector,
-                        struct snd_seq_client *src_client,
-                        struct snd_seq_client_port *src_port,
-                        struct snd_seq_client *dest_client,
-                        struct snd_seq_client_port *dest_port,
-                        struct snd_seq_port_subscribe *info)
+static int check_and_subscribe_port(struct snd_seq_client *client,
+                                   struct snd_seq_client_port *port,
+                                   struct snd_seq_subscribers *subs,
+                                   bool is_src, bool exclusive, bool ack)
 {
-       struct snd_seq_port_subs_info *src = &src_port->c_src;
-       struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
-       struct snd_seq_subscribers *subs, *s;
-       int err, src_called = 0;
-       unsigned long flags;
-       int exclusive;
+       struct snd_seq_port_subs_info *grp;
+       struct list_head *p;
+       struct snd_seq_subscribers *s;
+       int err;
 
-       subs = kzalloc(sizeof(*subs), GFP_KERNEL);
-       if (! subs)
-               return -ENOMEM;
-
-       subs->info = *info;
-       atomic_set(&subs->ref_count, 2);
-
-       down_write(&src->list_mutex);
-       down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
-       exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
+       grp = is_src ? &port->c_src : &port->c_dest;
        err = -EBUSY;
+       down_write(&grp->list_mutex);
        if (exclusive) {
-               if (! list_empty(&src->list_head) || ! list_empty(&dest->list_head))
+               if (!list_empty(&grp->list_head))
                        goto __error;
        } else {
-               if (src->exclusive || dest->exclusive)
+               if (grp->exclusive)
                        goto __error;
                /* check whether already exists */
-               list_for_each_entry(s, &src->list_head, src_list) {
-                       if (match_subs_info(info, &s->info))
-                               goto __error;
-               }
-               list_for_each_entry(s, &dest->list_head, dest_list) {
-                       if (match_subs_info(info, &s->info))
+               list_for_each(p, &grp->list_head) {
+                       s = get_subscriber(p, is_src);
+                       if (match_subs_info(&subs->info, &s->info))
                                goto __error;
                }
        }
 
-       if ((err = subscribe_port(src_client, src_port, src, info,
-                                 connector->number != src_client->number)) < 0)
-               goto __error;
-       src_called = 1;
-
-       if ((err = subscribe_port(dest_client, dest_port, dest, info,
-                                 connector->number != dest_client->number)) < 0)
+       err = subscribe_port(client, port, grp, &subs->info, ack);
+       if (err < 0) {
+               grp->exclusive = 0;
                goto __error;
+       }
 
        /* add to list */
-       write_lock_irqsave(&src->list_lock, flags);
-       // write_lock(&dest->list_lock); // no other lock yet
-       list_add_tail(&subs->src_list, &src->list_head);
-       list_add_tail(&subs->dest_list, &dest->list_head);
-       // write_unlock(&dest->list_lock); // no other lock yet
-       write_unlock_irqrestore(&src->list_lock, flags);
+       write_lock_irq(&grp->list_lock);
+       if (is_src)
+               list_add_tail(&subs->src_list, &grp->list_head);
+       else
+               list_add_tail(&subs->dest_list, &grp->list_head);
+       grp->exclusive = exclusive;
+       atomic_inc(&subs->ref_count);
+       write_unlock_irq(&grp->list_lock);
+       err = 0;
+
+ __error:
+       up_write(&grp->list_mutex);
+       return err;
+}
 
-       src->exclusive = dest->exclusive = exclusive;
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+                                       struct snd_seq_client_port *port,
+                                       struct snd_seq_subscribers *subs,
+                                       bool is_src, bool ack)
+{
+       struct snd_seq_port_subs_info *grp;
+
+       grp = is_src ? &port->c_src : &port->c_dest;
+       down_write(&grp->list_mutex);
+       write_lock_irq(&grp->list_lock);
+       if (is_src)
+               list_del(&subs->src_list);
+       else
+               list_del(&subs->dest_list);
+       grp->exclusive = 0;
+       write_unlock_irq(&grp->list_lock);
+       up_write(&grp->list_mutex);
+
+       unsubscribe_port(client, port, grp, &subs->info, ack);
+}
+
+/* connect two ports */
+int snd_seq_port_connect(struct snd_seq_client *connector,
+                        struct snd_seq_client *src_client,
+                        struct snd_seq_client_port *src_port,
+                        struct snd_seq_client *dest_client,
+                        struct snd_seq_client_port *dest_port,
+                        struct snd_seq_port_subscribe *info)
+{
+       struct snd_seq_subscribers *subs;
+       bool exclusive;
+       int err;
+
+       subs = kzalloc(sizeof(*subs), GFP_KERNEL);
+       if (!subs)
+               return -ENOMEM;
+
+       subs->info = *info;
+       atomic_set(&subs->ref_count, 0);
+       INIT_LIST_HEAD(&subs->src_list);
+       INIT_LIST_HEAD(&subs->dest_list);
+
+       exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE);
+
+       err = check_and_subscribe_port(src_client, src_port, subs, true,
+                                      exclusive,
+                                      connector->number != src_client->number);
+       if (err < 0)
+               goto error;
+       err = check_and_subscribe_port(dest_client, dest_port, subs, false,
+                                      exclusive,
+                                      connector->number != dest_client->number);
+       if (err < 0)
+               goto error_dest;
 
-       up_write(&dest->list_mutex);
-       up_write(&src->list_mutex);
        return 0;
 
__error:
-       if (src_called)
-               unsubscribe_port(src_client, src_port, src, info,
-                                connector->number != src_client->number);
error_dest:
+       delete_and_unsubscribe_port(src_client, src_port, subs, true,
+                                   connector->number != src_client->number);
+ error:
        kfree(subs);
-       up_write(&dest->list_mutex);
-       up_write(&src->list_mutex);
        return err;
 }
 
-
 /* remove the connection */
 int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_client *src_client,
@@ -567,37 +603,28 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_port_subscribe *info)
 {
        struct snd_seq_port_subs_info *src = &src_port->c_src;
-       struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
        struct snd_seq_subscribers *subs;
        int err = -ENOENT;
-       unsigned long flags;
 
        down_write(&src->list_mutex);
-       down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
-
        /* look for the connection */
        list_for_each_entry(subs, &src->list_head, src_list) {
                if (match_subs_info(info, &subs->info)) {
-                       write_lock_irqsave(&src->list_lock, flags);
-                       // write_lock(&dest->list_lock);  // no lock yet
-                       list_del(&subs->src_list);
-                       list_del(&subs->dest_list);
-                       // write_unlock(&dest->list_lock);
-                       write_unlock_irqrestore(&src->list_lock, flags);
-                       src->exclusive = dest->exclusive = 0;
-                       unsubscribe_port(src_client, src_port, src, info,
-                                        connector->number != src_client->number);
-                       unsubscribe_port(dest_client, dest_port, dest, info,
-                                        connector->number != dest_client->number);
-                       kfree(subs);
+                       atomic_dec(&subs->ref_count); /* mark as not ready */
                        err = 0;
                        break;
                }
        }
-
-       up_write(&dest->list_mutex);
        up_write(&src->list_mutex);
-       return err;
+       if (err < 0)
+               return err;
+
+       delete_and_unsubscribe_port(src_client, src_port, subs, true,
+                                   connector->number != src_client->number);
+       delete_and_unsubscribe_port(dest_client, dest_port, subs, false,
+                                   connector->number != dest_client->number);
+       kfree(subs);
+       return 0;
 }
 
 
index 82b220c769c131ecd05fea96fd0692c12d56642f..293104926098f7074001b6f6a4248d2a09ddd360 100644 (file)
@@ -90,6 +90,9 @@ void snd_seq_timer_delete(struct snd_seq_timer **tmr)
 
 void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&tmr->lock, flags);
        /* setup defaults */
        tmr->ppq = 96;          /* 96 PPQ */
        tmr->tempo = 500000;    /* 120 BPM */
@@ -105,21 +108,25 @@ void snd_seq_timer_defaults(struct snd_seq_timer * tmr)
        tmr->preferred_resolution = seq_default_timer_resolution;
 
        tmr->skew = tmr->skew_base = SKEW_BASE;
+       spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
-void snd_seq_timer_reset(struct snd_seq_timer * tmr)
+static void seq_timer_reset(struct snd_seq_timer *tmr)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&tmr->lock, flags);
-
        /* reset time & songposition */
        tmr->cur_time.tv_sec = 0;
        tmr->cur_time.tv_nsec = 0;
 
        tmr->tick.cur_tick = 0;
        tmr->tick.fraction = 0;
+}
+
+void snd_seq_timer_reset(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
 
+       spin_lock_irqsave(&tmr->lock, flags);
+       seq_timer_reset(tmr);
        spin_unlock_irqrestore(&tmr->lock, flags);
 }
 
@@ -138,8 +145,11 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri,
        tmr = q->timer;
        if (tmr == NULL)
                return;
-       if (!tmr->running)
+       spin_lock_irqsave(&tmr->lock, flags);
+       if (!tmr->running) {
+               spin_unlock_irqrestore(&tmr->lock, flags);
                return;
+       }
 
        resolution *= ticks;
        if (tmr->skew != tmr->skew_base) {
@@ -148,8 +158,6 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri,
                        (((resolution & 0xffff) * tmr->skew) >> 16);
        }
 
-       spin_lock_irqsave(&tmr->lock, flags);
-
        /* update timer */
        snd_seq_inc_time_nsec(&tmr->cur_time, resolution);
 
@@ -296,26 +304,30 @@ int snd_seq_timer_open(struct snd_seq_queue *q)
        t->callback = snd_seq_timer_interrupt;
        t->callback_data = q;
        t->flags |= SNDRV_TIMER_IFLG_AUTO;
+       spin_lock_irq(&tmr->lock);
        tmr->timeri = t;
+       spin_unlock_irq(&tmr->lock);
        return 0;
 }
 
 int snd_seq_timer_close(struct snd_seq_queue *q)
 {
        struct snd_seq_timer *tmr;
+       struct snd_timer_instance *t;
        
        tmr = q->timer;
        if (snd_BUG_ON(!tmr))
                return -EINVAL;
-       if (tmr->timeri) {
-               snd_timer_stop(tmr->timeri);
-               snd_timer_close(tmr->timeri);
-               tmr->timeri = NULL;
-       }
+       spin_lock_irq(&tmr->lock);
+       t = tmr->timeri;
+       tmr->timeri = NULL;
+       spin_unlock_irq(&tmr->lock);
+       if (t)
+               snd_timer_close(t);
        return 0;
 }
 
-int snd_seq_timer_stop(struct snd_seq_timer * tmr)
+static int seq_timer_stop(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
@@ -326,6 +338,17 @@ int snd_seq_timer_stop(struct snd_seq_timer * tmr)
        return 0;
 }
 
+int snd_seq_timer_stop(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_stop(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
 static int initialize_timer(struct snd_seq_timer *tmr)
 {
        struct snd_timer *t;
@@ -358,13 +381,13 @@ static int initialize_timer(struct snd_seq_timer *tmr)
        return 0;
 }
 
-int snd_seq_timer_start(struct snd_seq_timer * tmr)
+static int seq_timer_start(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
        if (tmr->running)
-               snd_seq_timer_stop(tmr);
-       snd_seq_timer_reset(tmr);
+               seq_timer_stop(tmr);
+       seq_timer_reset(tmr);
        if (initialize_timer(tmr) < 0)
                return -EINVAL;
        snd_timer_start(tmr->timeri, tmr->ticks);
@@ -373,14 +396,25 @@ int snd_seq_timer_start(struct snd_seq_timer * tmr)
        return 0;
 }
 
-int snd_seq_timer_continue(struct snd_seq_timer * tmr)
+int snd_seq_timer_start(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_start(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
+static int seq_timer_continue(struct snd_seq_timer *tmr)
 {
        if (! tmr->timeri)
                return -EINVAL;
        if (tmr->running)
                return -EBUSY;
        if (! tmr->initialized) {
-               snd_seq_timer_reset(tmr);
+               seq_timer_reset(tmr);
                if (initialize_timer(tmr) < 0)
                        return -EINVAL;
        }
@@ -390,11 +424,24 @@ int snd_seq_timer_continue(struct snd_seq_timer * tmr)
        return 0;
 }
 
+int snd_seq_timer_continue(struct snd_seq_timer *tmr)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&tmr->lock, flags);
+       err = seq_timer_continue(tmr);
+       spin_unlock_irqrestore(&tmr->lock, flags);
+       return err;
+}
+
 /* return current 'real' time. use timeofday() to get better granularity. */
 snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
 {
        snd_seq_real_time_t cur_time;
+       unsigned long flags;
 
+       spin_lock_irqsave(&tmr->lock, flags);
        cur_time = tmr->cur_time;
        if (tmr->running) { 
                struct timeval tm;
@@ -410,7 +457,7 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
                }
                snd_seq_sanity_real_time(&cur_time);
        }
-                
+       spin_unlock_irqrestore(&tmr->lock, flags);
        return cur_time;        
 }
 
index 3da2d48610b3a91e4b93b0967b1fac5afebe41f7..c82ed3e70506db65adcbd48f6bdd55bd9628633d 100644 (file)
@@ -155,21 +155,26 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
        struct snd_virmidi *vmidi = substream->runtime->private_data;
        int count, res;
        unsigned char buf[32], *pbuf;
+       unsigned long flags;
 
        if (up) {
                vmidi->trigger = 1;
                if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH &&
                    !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) {
-                       snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail);
-                       return;         /* ignored */
+                       while (snd_rawmidi_transmit(substream, buf,
+                                                   sizeof(buf)) > 0) {
+                               /* ignored */
+                       }
+                       return;
                }
                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
                                return;
                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                }
+               spin_lock_irqsave(&substream->runtime->lock, flags);
                while (1) {
-                       count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
+                       count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
                        if (count <= 0)
                                break;
                        pbuf = buf;
@@ -179,16 +184,18 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
                                        snd_midi_event_reset_encode(vmidi->parser);
                                        continue;
                                }
-                               snd_rawmidi_transmit_ack(substream, res);
+                               __snd_rawmidi_transmit_ack(substream, res);
                                pbuf += res;
                                count -= res;
                                if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
                                        if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-                                               return;
+                                               goto out;
                                        vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
                                }
                        }
                }
+       out:
+               spin_unlock_irqrestore(&substream->runtime->lock, flags);
        } else {
                vmidi->trigger = 0;
        }
@@ -254,9 +261,13 @@ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream)
  */
 static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream)
 {
+       struct snd_virmidi_dev *rdev = substream->rmidi->private_data;
        struct snd_virmidi *vmidi = substream->runtime->private_data;
-       snd_midi_event_free(vmidi->parser);
+
+       write_lock_irq(&rdev->filelist_lock);
        list_del(&vmidi->list);
+       write_unlock_irq(&rdev->filelist_lock);
+       snd_midi_event_free(vmidi->parser);
        substream->runtime->private_data = NULL;
        kfree(vmidi);
        return 0;
index cb25aded53499cdf86ceb954e2a4ffdca4913595..9b513a05765a10985d556b101918ed3783b9ae60 100644 (file)
@@ -65,6 +65,7 @@ struct snd_timer_user {
        int qtail;
        int qused;
        int queue_size;
+       bool disconnected;
        struct snd_timer_read *queue;
        struct snd_timer_tread *tqueue;
        spinlock_t qlock;
@@ -290,6 +291,9 @@ int snd_timer_open(struct snd_timer_instance **ti,
                mutex_unlock(&register_mutex);
                return -ENOMEM;
        }
+       /* take a card refcount for safe disconnection */
+       if (timer->card)
+               get_device(&timer->card->card_dev);
        timeri->slave_class = tid->dev_sclass;
        timeri->slave_id = slave_id;
        if (list_empty(&timer->open_list_head) && timer->hw.open)
@@ -359,6 +363,9 @@ int snd_timer_close(struct snd_timer_instance *timeri)
                }
                spin_unlock(&timer->lock);
                spin_unlock_irq(&slave_active_lock);
+               /* release a card refcount for safe disconnection */
+               if (timer->card)
+                       put_device(&timer->card->card_dev);
                mutex_unlock(&register_mutex);
        }
  out:
@@ -444,6 +451,10 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri)
        unsigned long flags;
 
        spin_lock_irqsave(&slave_active_lock, flags);
+       if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+               spin_unlock_irqrestore(&slave_active_lock, flags);
+               return -EBUSY;
+       }
        timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
        if (timeri->master && timeri->timer) {
                spin_lock(&timeri->timer->lock);
@@ -468,18 +479,28 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks)
                return -EINVAL;
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
                result = snd_timer_start_slave(timeri);
-               snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+               if (result >= 0)
+                       snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
                return result;
        }
        timer = timeri->timer;
        if (timer == NULL)
                return -EINVAL;
+       if (timer->card && timer->card->shutdown)
+               return -ENODEV;
        spin_lock_irqsave(&timer->lock, flags);
+       if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+                            SNDRV_TIMER_IFLG_START)) {
+               result = -EBUSY;
+               goto unlock;
+       }
        timeri->ticks = timeri->cticks = ticks;
        timeri->pticks = 0;
        result = snd_timer_start1(timer, timeri, ticks);
+ unlock:
        spin_unlock_irqrestore(&timer->lock, flags);
-       snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
+       if (result >= 0)
+               snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START);
        return result;
 }
 
@@ -493,6 +514,10 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event)
 
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) {
                spin_lock_irqsave(&slave_active_lock, flags);
+               if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) {
+                       spin_unlock_irqrestore(&slave_active_lock, flags);
+                       return -EBUSY;
+               }
                timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
                list_del_init(&timeri->ack_list);
                list_del_init(&timeri->active_list);
@@ -503,8 +528,17 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event)
        if (!timer)
                return -EINVAL;
        spin_lock_irqsave(&timer->lock, flags);
+       if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+                              SNDRV_TIMER_IFLG_START))) {
+               spin_unlock_irqrestore(&timer->lock, flags);
+               return -EBUSY;
+       }
        list_del_init(&timeri->ack_list);
        list_del_init(&timeri->active_list);
+       if (timer->card && timer->card->shutdown) {
+               spin_unlock_irqrestore(&timer->lock, flags);
+               return 0;
+       }
        if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) &&
            !(--timer->running)) {
                timer->hw.stop(timer);
@@ -565,11 +599,18 @@ int snd_timer_continue(struct snd_timer_instance *timeri)
        timer = timeri->timer;
        if (! timer)
                return -EINVAL;
+       if (timer->card && timer->card->shutdown)
+               return -ENODEV;
        spin_lock_irqsave(&timer->lock, flags);
+       if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) {
+               result = -EBUSY;
+               goto unlock;
+       }
        if (!timeri->cticks)
                timeri->cticks = 1;
        timeri->pticks = 0;
        result = snd_timer_start1(timer, timeri, timer->sticks);
+ unlock:
        spin_unlock_irqrestore(&timer->lock, flags);
        snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_CONTINUE);
        return result;
@@ -628,6 +669,9 @@ static void snd_timer_tasklet(unsigned long arg)
        unsigned long resolution, ticks;
        unsigned long flags;
 
+       if (timer->card && timer->card->shutdown)
+               return;
+
        spin_lock_irqsave(&timer->lock, flags);
        /* now process all callbacks */
        while (!list_empty(&timer->sack_list_head)) {
@@ -668,6 +712,9 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
        if (timer == NULL)
                return;
 
+       if (timer->card && timer->card->shutdown)
+               return;
+
        spin_lock_irqsave(&timer->lock, flags);
 
        /* remember the current resolution */
@@ -697,8 +744,8 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
                        ti->cticks = ti->ticks;
                } else {
                        ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
-                       if (--timer->running)
-                               list_del_init(&ti->active_list);
+                       --timer->running;
+                       list_del_init(&ti->active_list);
                }
                if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) ||
                    (ti->flags & SNDRV_TIMER_IFLG_FAST))
@@ -881,8 +928,15 @@ static int snd_timer_dev_register(struct snd_device *dev)
 static int snd_timer_dev_disconnect(struct snd_device *device)
 {
        struct snd_timer *timer = device->device_data;
+       struct snd_timer_instance *ti;
+
        mutex_lock(&register_mutex);
        list_del_init(&timer->device_list);
+       /* wake up pending sleepers */
+       list_for_each_entry(ti, &timer->open_list_head, open_list) {
+               if (ti->disconnect)
+                       ti->disconnect(ti);
+       }
        mutex_unlock(&register_mutex);
        return 0;
 }
@@ -893,6 +947,8 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam
        unsigned long resolution = 0;
        struct snd_timer_instance *ti, *ts;
 
+       if (timer->card && timer->card->shutdown)
+               return;
        if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE))
                return;
        if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART ||
@@ -1002,11 +1058,21 @@ static int snd_timer_s_stop(struct snd_timer * timer)
        return 0;
 }
 
+static int snd_timer_s_close(struct snd_timer *timer)
+{
+       struct snd_timer_system_private *priv;
+
+       priv = (struct snd_timer_system_private *)timer->private_data;
+       del_timer_sync(&priv->tlist);
+       return 0;
+}
+
 static struct snd_timer_hardware snd_timer_system =
 {
        .flags =        SNDRV_TIMER_HW_FIRST | SNDRV_TIMER_HW_TASKLET,
        .resolution =   1000000000L / HZ,
        .ticks =        10000000L,
+       .close =        snd_timer_s_close,
        .start =        snd_timer_s_start,
        .stop =         snd_timer_s_stop
 };
@@ -1051,6 +1117,8 @@ static void snd_timer_proc_read(struct snd_info_entry *entry,
 
        mutex_lock(&register_mutex);
        list_for_each_entry(timer, &snd_timer_list, device_list) {
+               if (timer->card && timer->card->shutdown)
+                       continue;
                switch (timer->tmr_class) {
                case SNDRV_TIMER_CLASS_GLOBAL:
                        snd_iprintf(buffer, "G%i: ", timer->tmr_device);
@@ -1185,6 +1253,14 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri,
        wake_up(&tu->qchange_sleep);
 }
 
+static void snd_timer_user_disconnect(struct snd_timer_instance *timeri)
+{
+       struct snd_timer_user *tu = timeri->callback_data;
+
+       tu->disconnected = true;
+       wake_up(&tu->qchange_sleep);
+}
+
 static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
                                      unsigned long resolution,
                                      unsigned long ticks)
@@ -1558,6 +1634,7 @@ static int snd_timer_user_tselect(struct file *file,
                        ? snd_timer_user_tinterrupt : snd_timer_user_interrupt;
                tu->timeri->ccallback = snd_timer_user_ccallback;
                tu->timeri->callback_data = (void *)tu;
+               tu->timeri->disconnect = snd_timer_user_disconnect;
        }
 
       __err:
@@ -1876,6 +1953,10 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
                        remove_wait_queue(&tu->qchange_sleep, &wait);
 
+                       if (tu->disconnected) {
+                               err = -ENODEV;
+                               break;
+                       }
                        if (signal_pending(current)) {
                                err = -ERESTARTSYS;
                                break;
@@ -1925,6 +2006,8 @@ static unsigned int snd_timer_user_poll(struct file *file, poll_table * wait)
        mask = 0;
        if (tu->qused)
                mask |= POLLIN | POLLRDNORM;
+       if (tu->disconnected)
+               mask |= POLLERR;
 
        return mask;
 }
index 75b74850c00535ee320f3827e094d9d4da1e30e3..bde33308f0d6bc697035a627aef2cc24523b195c 100644 (file)
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver.");
 module_param(fake_buffer, bool, 0444);
 MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
 #ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
 MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
 #endif
 
index 926e5dcbb66a1a3ec523482ceb5e199f8c455b64..5022c9b97ddfc007bc6119ab6a0cb41fafb2c16d 100644 (file)
@@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = {
        [6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-               if (snd_bebob_rate_table[i] == rate)
-                       return i;
+               if (snd_bebob_rate_table[i] == rate) {
+                       *index = i;
+                       return 0;
+               }
        }
        return -EINVAL;
 }
@@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate)
                goto end;
 
        /* confirm params for both streams */
-       index = get_formation_index(rate);
+       err = get_formation_index(rate, &index);
+       if (err < 0)
+               goto end;
        pcm_channels = bebob->tx_stream_formations[index].pcm;
        midi_channels = bebob->tx_stream_formations[index].midi;
        err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
index c50177fb469f5f67974c853b109c07a68ef2524d..f6854dbd7d8d0eed41b5158c25b20e03211ccac5 100644 (file)
@@ -306,7 +306,7 @@ out_master_del:
 out_err:
        kfree(acomp);
        bus->audio_component = NULL;
-       dev_err(dev, "failed to add i915 component master (%d)\n", ret);
+       dev_info(dev, "failed to add i915 component master (%d)\n", ret);
 
        return ret;
 }
index 0216475fc759e23442a9f2ab7775c6966458b2cd..37adcc6cbe6bd852a290cc95ea58cc05863e88ba 100644 (file)
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+       select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@ config SND_AD1816A
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Analog Devices SoundPort
          AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@ config SND_GUSCLASSIC
        tristate "Gravis UltraSound Classic"
        select SND_RAWMIDI
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Classic
          soundcards.
@@ -221,6 +224,7 @@ config SND_GUSEXTREME
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Extreme
          soundcards.
index 656ce39bddbc531c27d5a15b6d07e899e17f1cd6..8f6594a7d37f7b940ded27f6a8a0395ea8fa1442 100644 (file)
@@ -155,6 +155,7 @@ config SND_AZT3328
        select SND_PCM
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@ config SND_EMU10K1
        select SND_HWDEP
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@ config SND_YMFPCI
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_AC97_CODEC
+       select SND_TIMER
        help
          Say Y here to include support for Yamaha PCI audio chips -
          YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
index 28e2f8b42f5e8e4e7f3aaaaa88f45d6834b02591..891453451543102994b7912584f00483b2c472f0 100644 (file)
@@ -1141,6 +1141,14 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
                emu->emu1010.firmware_thread =
                        kthread_create(emu1010_firmware_thread, emu,
                                       "emu1010_firmware");
+               if (IS_ERR(emu->emu1010.firmware_thread)) {
+                       err = PTR_ERR(emu->emu1010.firmware_thread);
+                       emu->emu1010.firmware_thread = NULL;
+                       dev_info(emu->card->dev,
+                                       "emu1010: Creating thread failed\n");
+                       return err;
+               }
+
                wake_up_process(emu->emu1010.firmware_thread);
        }
 
index 70671ad65d24565fb6c8522bd69444817aac2933..6efadbfb3fe3511b4726541cdd9ebd4aae6cd65d 100644 (file)
@@ -174,14 +174,40 @@ static inline bool codec_probed(struct hda_codec *codec)
        return device_attach(hda_codec_dev(codec)) > 0 && codec->preset;
 }
 
-/* try to auto-load and bind the codec module */
-static void codec_bind_module(struct hda_codec *codec)
+/* try to auto-load codec module */
+static void request_codec_module(struct hda_codec *codec)
 {
 #ifdef MODULE
        char modalias[32];
+       const char *mod = NULL;
+
+       switch (codec->probe_id) {
+       case HDA_CODEC_ID_GENERIC_HDMI:
+#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
+               mod = "snd-hda-codec-hdmi";
+#endif
+               break;
+       case HDA_CODEC_ID_GENERIC:
+#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
+               mod = "snd-hda-codec-generic";
+#endif
+               break;
+       default:
+               snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
+               mod = modalias;
+               break;
+       }
+
+       if (mod)
+               request_module(mod);
+#endif /* MODULE */
+}
 
-       snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
-       request_module(modalias);
+/* try to auto-load and bind the codec module */
+static void codec_bind_module(struct hda_codec *codec)
+{
+#ifdef MODULE
+       request_codec_module(codec);
        if (codec_probed(codec))
                return;
 #endif
@@ -218,17 +244,13 @@ static int codec_bind_generic(struct hda_codec *codec)
 
        if (is_likely_hdmi_codec(codec)) {
                codec->probe_id = HDA_CODEC_ID_GENERIC_HDMI;
-#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
-               request_module("snd-hda-codec-hdmi");
-#endif
+               request_codec_module(codec);
                if (codec_probed(codec))
                        return 0;
        }
 
        codec->probe_id = HDA_CODEC_ID_GENERIC;
-#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
-       request_module("snd-hda-codec-generic");
-#endif
+       request_codec_module(codec);
        if (codec_probed(codec))
                return 0;
        return -ENODEV;
index c0bef11afa7e2cc01138a46708f4d6e9cd4bd529..4045dca3d699edd13f06aa9ab7c095948c932a55 100644 (file)
@@ -90,6 +90,8 @@ enum {
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL         0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -534,10 +536,21 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
 {
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
+       u32 val;
 
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, true);
+       if (IS_BROXTON(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        azx_init_chip(chip, full_reset);
+       if (IS_BROXTON(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, false);
 
@@ -2078,9 +2091,11 @@ static int azx_probe_continue(struct azx *chip)
                         * for other chips, still continue probing as other
                         * codecs can be on the same link.
                         */
-                       if (CONTROLLER_IN_GPU(pci))
+                       if (CONTROLLER_IN_GPU(pci)) {
+                               dev_err(chip->card->dev,
+                                       "HSW/BDW HD-audio HDMI/DP requires binding with gfx driver\n");
                                goto out_free;
-                       else
+                       else
                                goto skip_i915;
                }
 
@@ -2149,9 +2164,17 @@ i915_power_fail:
 static void azx_remove(struct pci_dev *pci)
 {
        struct snd_card *card = pci_get_drvdata(pci);
+       struct azx *chip;
+       struct hda_intel *hda;
+
+       if (card) {
+               /* flush the pending probing work */
+               chip = card->private_data;
+               hda = container_of(chip, struct hda_intel, chip);
+               flush_work(&hda->probe_work);
 
-       if (card)
                snd_card_free(card);
+       }
 }
 
 static void azx_shutdown(struct pci_dev *pci)
index a12ae8ac091451261a2ba613543677fabedb8cd0..c1c855a6c0af8199d03b04419d16d3494507ddeb 100644 (file)
@@ -614,6 +614,7 @@ enum {
        CS4208_MAC_AUTO,
        CS4208_MBA6,
        CS4208_MBP11,
+       CS4208_MACMINI,
        CS4208_GPIO0,
 };
 
@@ -621,6 +622,7 @@ static const struct hda_model_fixup cs4208_models[] = {
        { .id = CS4208_GPIO0, .name = "gpio0" },
        { .id = CS4208_MBA6, .name = "mba6" },
        { .id = CS4208_MBP11, .name = "mbp11" },
+       { .id = CS4208_MACMINI, .name = "macmini" },
        {}
 };
 
@@ -632,6 +634,7 @@ static const struct snd_pci_quirk cs4208_fixup_tbl[] = {
 /* codec SSID matching */
 static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = {
        SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11),
+       SND_PCI_QUIRK(0x106b, 0x6c00, "MacMini 7,1", CS4208_MACMINI),
        SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6),
        SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6),
        SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11),
@@ -666,6 +669,24 @@ static void cs4208_fixup_mac(struct hda_codec *codec,
        snd_hda_apply_fixup(codec, action);
 }
 
+/* MacMini 7,1 has the inverted jack detection */
+static void cs4208_fixup_macmini(struct hda_codec *codec,
+                                const struct hda_fixup *fix, int action)
+{
+       static const struct hda_pintbl pincfgs[] = {
+               { 0x18, 0x00ab9150 }, /* mic (audio-in) jack: disable detect */
+               { 0x21, 0x004be140 }, /* SPDIF: disable detect */
+               { }
+       };
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               /* HP pin (0x10) has an inverted detection */
+               codec->inv_jack_detect = 1;
+               /* disable the bogus Mic and SPDIF jack detections */
+               snd_hda_apply_pincfgs(codec, pincfgs);
+       }
+}
+
 static int cs4208_spdif_sw_put(struct snd_kcontrol *kcontrol,
                               struct snd_ctl_elem_value *ucontrol)
 {
@@ -709,6 +730,12 @@ static const struct hda_fixup cs4208_fixups[] = {
                .chained = true,
                .chain_id = CS4208_GPIO0,
        },
+       [CS4208_MACMINI] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs4208_fixup_macmini,
+               .chained = true,
+               .chain_id = CS4208_GPIO0,
+       },
        [CS4208_GPIO0] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs4208_fixup_gpio0,
index 426a29a1c19bbda5841bd9d4574bf66e83cb8b76..1f52b55d77c92d7114ca375d79a43e8f798cc580 100644 (file)
@@ -3653,6 +3653,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP",     patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",  patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",      patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",   patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",   patch_via_hdmi),
index 8143c0e24a27ea99bd212bfd33f2bdf805424885..21992fb7035d456c6bac7124c7a4b350ae88fc64 100644 (file)
@@ -327,6 +327,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0292:
                alc_update_coef_idx(codec, 0x4, 1<<15, 0);
                break;
+       case 0x10ec0225:
        case 0x10ec0233:
        case 0x10ec0255:
        case 0x10ec0256:
@@ -900,6 +901,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = {
        { 0x10ec0899, 0x1028, 0, "ALC3861" },
        { 0x10ec0298, 0x1028, 0, "ALC3266" },
        { 0x10ec0256, 0x1028, 0, "ALC3246" },
+       { 0x10ec0225, 0x1028, 0, "ALC3253" },
        { 0x10ec0670, 0x1025, 0, "ALC669X" },
        { 0x10ec0676, 0x1025, 0, "ALC679X" },
        { 0x10ec0282, 0x1043, 0, "ALC3229" },
@@ -2651,6 +2653,7 @@ enum {
        ALC269_TYPE_ALC298,
        ALC269_TYPE_ALC255,
        ALC269_TYPE_ALC256,
+       ALC269_TYPE_ALC225,
 };
 
 /*
@@ -2680,6 +2683,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
        case ALC269_TYPE_ALC298:
        case ALC269_TYPE_ALC255:
        case ALC269_TYPE_ALC256:
+       case ALC269_TYPE_ALC225:
                ssids = alc269_ssids;
                break;
        default:
@@ -3658,6 +3662,16 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
                WRITE_COEF(0xb7, 0x802b),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x4a, 1<<8, 0),
+               UPDATE_COEFEX(0x57, 0x05, 1<<14, 0),
+               UPDATE_COEF(0x63, 3<<14, 3<<14),
+               UPDATE_COEF(0x4a, 3<<4, 2<<4),
+               UPDATE_COEF(0x4a, 3<<10, 3<<10),
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               UPDATE_COEF(0x4a, 3<<10, 0),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3682,6 +3696,9 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0668);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to unplugged mode.\n");
 }
@@ -3727,6 +3744,13 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                UPDATE_COEF(0xc3, 0, 1<<12),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14),
+               UPDATE_COEF(0x4a, 3<<4, 2<<4),
+               UPDATE_COEF(0x63, 3<<14, 0),
+               {}
+       };
+
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3772,6 +3796,12 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                alc_process_coef_fw(codec, coef0688);
                snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
                break;
+       case 0x10ec0225:
+               alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x31<<10);
+               snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+               alc_process_coef_fw(codec, coef0225);
+               snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+               break;
        }
        codec_dbg(codec, "Headset jack set to mic-in mode.\n");
 }
@@ -3884,6 +3914,13 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0000),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x35<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               UPDATE_COEF(0x4a, 7<<6, 7<<6),
+               UPDATE_COEF(0x4a, 3<<4, 3<<4),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3912,6 +3949,9 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0688);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to iPhone-style headset mode.\n");
 }
@@ -3955,6 +3995,13 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0000),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x39<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               UPDATE_COEF(0x4a, 7<<6, 7<<6),
+               UPDATE_COEF(0x4a, 3<<4, 3<<4),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -3983,6 +4030,9 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
        case 0x10ec0668:
                alc_process_coef_fw(codec, coef0688);
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               break;
        }
        codec_dbg(codec, "Headset jack set to Nokia-style headset mode.\n");
 }
@@ -4014,6 +4064,11 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                WRITE_COEF(0xc3, 0x0c00),
                {}
        };
+       static struct coef_fw coef0225[] = {
+               UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+               UPDATE_COEF(0x49, 1<<8, 1<<8),
+               {}
+       };
 
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
@@ -4058,6 +4113,12 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                val = alc_read_coef_idx(codec, 0xbe);
                is_ctia = (val & 0x1c02) == 0x1c02;
                break;
+       case 0x10ec0225:
+               alc_process_coef_fw(codec, coef0225);
+               msleep(800);
+               val = alc_read_coef_idx(codec, 0x46);
+               is_ctia = (val & 0x00f0) == 0x00f0;
+               break;
        }
 
        codec_dbg(codec, "Headset jack detected iPhone-style headset: %s\n",
@@ -5560,6 +5621,9 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
        {}
 };
+#define ALC225_STANDARD_PINS \
+       {0x12, 0xb7a60130}, \
+       {0x21, 0x04211020}
 
 #define ALC256_STANDARD_PINS \
        {0x12, 0x90a60140}, \
@@ -5581,6 +5645,12 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {0x21, 0x03211020}
 
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+       SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC225_STANDARD_PINS,
+               {0x14, 0x901701a0}),
+       SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC225_STANDARD_PINS,
+               {0x14, 0x901701b0}),
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
                {0x14, 0x90170110},
                {0x21, 0x02211020}),
@@ -5906,6 +5976,9 @@ static int patch_alc269(struct hda_codec *codec)
                spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
                alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
                break;
+       case 0x10ec0225:
+               spec->codec_variant = ALC269_TYPE_ALC225;
+               break;
        }
 
        if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6566,6 +6639,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
        SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
+       SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
        SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
        SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
        SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),
@@ -6795,6 +6869,7 @@ static int patch_alc680(struct hda_codec *codec)
  */
 static const struct hda_device_id snd_hda_id_realtek[] = {
        HDA_CODEC_ENTRY(0x10ec0221, "ALC221", patch_alc269),
+       HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269),
        HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269),
index d75deba5617db05dc02407b6c669d61ee1695a5e..dfcd38647606445d7dab130178c7ca4f765ff33c 100644 (file)
@@ -22,6 +22,7 @@ config SND_SUN_AMD7930
 config SND_SUN_CS4231
        tristate "Sun CS4231"
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for CS4231 sound device on Sun.
 
index 39522367897caedb4be23f1f92579d31a4d1fe38..fac7e6eb9529c26d2bb3e30ceaba11b42f55d15b 100644 (file)
@@ -221,6 +221,8 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream)
        runtime->hw = snd_at73c213_playback_hw;
        chip->substream = substream;
 
+       clk_enable(chip->ssc->clk);
+
        return 0;
 }
 
@@ -228,6 +230,7 @@ static int snd_at73c213_pcm_close(struct snd_pcm_substream *substream)
 {
        struct snd_at73c213 *chip = snd_pcm_substream_chip(substream);
        chip->substream = NULL;
+       clk_disable(chip->ssc->clk);
        return 0;
 }
 
@@ -897,6 +900,8 @@ static int snd_at73c213_dev_init(struct snd_card *card,
        chip->card = card;
        chip->irq = -1;
 
+       clk_enable(chip->ssc->clk);
+
        retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip);
        if (retval) {
                dev_dbg(&chip->spi->dev, "unable to request irq %d\n", irq);
@@ -935,6 +940,8 @@ out_irq:
        free_irq(chip->irq, chip);
        chip->irq = -1;
 out:
+       clk_disable(chip->ssc->clk);
+
        return retval;
 }
 
@@ -1012,7 +1019,9 @@ static int snd_at73c213_remove(struct spi_device *spi)
        int retval;
 
        /* Stop playback. */
+       clk_enable(chip->ssc->clk);
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+       clk_disable(chip->ssc->clk);
 
        /* Mute sound. */
        retval = snd_at73c213_write_reg(chip, DAC_LMPG, 0x3f);
@@ -1080,6 +1089,7 @@ static int snd_at73c213_suspend(struct device *dev)
        struct snd_at73c213 *chip = card->private_data;
 
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+       clk_disable(chip->ssc->clk);
        clk_disable(chip->board->dac_clk);
 
        return 0;
@@ -1091,6 +1101,7 @@ static int snd_at73c213_resume(struct device *dev)
        struct snd_at73c213 *chip = card->private_data;
 
        clk_enable(chip->board->dac_clk);
+       clk_enable(chip->ssc->clk);
        ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN));
 
        return 0;
index 23ea6d800c4c5283adc27d88fd26e74feaf066e0..4f6ce1cac8e20ef8b504073a6dfcc6236c0cf90c 100644 (file)
@@ -1121,6 +1121,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        switch (chip->usb_id) {
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+       case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
        case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
        case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
@@ -1205,8 +1206,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev)
         * "Playback Design" products need a 50ms delay after setting the
         * USB interface.
         */
-       if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+       switch (le16_to_cpu(dev->descriptor.idVendor)) {
+       case 0x23ba: /* Playback Design */
+       case 0x0644: /* TEAC Corp. */
                mdelay(50);
+               break;
+       }
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1226,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                mdelay(20);
 
+       /*
+        * "TEAC Corp." products need a 20ms delay after each
+        * class compliant request
+        */
+       if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+           (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+               mdelay(20);
+
        /* Marantz/Denon devices with USB DAC functionality need a delay
         * after each class compliant request
         */
@@ -1269,7 +1282,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
        case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
        case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
-       case USB_ID(0x22d8, 0x0416): /* OPPO HA-1*/
+       case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */
                if (fp->altsetting == 2)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
@@ -1278,6 +1291,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
        case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
        case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */
+       case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index ea69ce35e902a828b40839cfd7beb14bb9ff29c8..c3bd294a63d1f7e4a75b2a950bbe398f33703cd6 100644 (file)
@@ -3746,7 +3746,7 @@ static const struct flag flags[] = {
        { "NET_TX_SOFTIRQ", 2 },
        { "NET_RX_SOFTIRQ", 3 },
        { "BLOCK_SOFTIRQ", 4 },
-       { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+       { "IRQ_POLL_SOFTIRQ", 5 },
        { "TASKLET_SOFTIRQ", 6 },
        { "SCHED_SOFTIRQ", 7 },
        { "HRTIMER_SOFTIRQ", 8 },
index 0a22407e1d7d8abb092ff3d24b6829a27c234bbe..5d34815c7ccb8e02ac6d4db0d82d4712af73f343 100644 (file)
@@ -77,6 +77,9 @@ include config/utilities.mak
 # Define NO_AUXTRACE if you do not want AUX area tracing support
 #
 # Define NO_LIBBPF if you do not want BPF support
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
 include config/Makefile
 endif
 
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
+endif
+
 export prefix bindir sharedir sysconfdir DESTDIR
 
 # sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
        $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 $(LIBBPF): fixdep FORCE
-       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
+       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
        $(call QUIET_CLEAN, libbpf)
@@ -610,6 +622,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
        $(python-clean)
 
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+       @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+       @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+       @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
 #
 # Trick: if ../../.git does not exist - we are building out of tree for example,
 # then force version regeneration:
index 3e89ba825f6bf3b3f8b15b9110564c63283998b4..7f064eb371589aa887e112cddb54145d6268141b 100644 (file)
@@ -17,7 +17,7 @@ static pid_t spawn(void)
        if (pid)
                return pid;
 
-       while(1);
+       while(1)
                sleep(5);
        return 0;
 }
index e5959c136a1907ee9905f044ae2e7786cb47d075..511141b102e8464fe3e102007f792220306ad3b9 100644 (file)
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
+ifeq ($(FEATURES_DUMP),)
 include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all
index df38decc48c32df72e9dfa86ef3f7e7b035322e7..f918015512af96d1173891dac2e8f460ff65ca85 100644 (file)
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 # no target specified, trigger the whole suite
 all:
        @echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -13,6 +13,26 @@ else
 endif
 else
 PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+  K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -156,11 +176,11 @@ test_make_doc    := $(test_ok)
 test_make_help_O := $(test_ok)
 test_make_doc_O  := $(test_ok)
 
-test_make_python_perf_so := test -f $(PERF)/python/perf.so
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
 
-test_make_perf_o           := test -f $(PERF)/perf.o
-test_make_util_map_o       := test -f $(PERF)/util/map.o
-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
 
 define test_dest_files
   for file in $(1); do                         \
@@ -227,7 +247,7 @@ test_make_perf_o_O            := test -f $$TMP_O/perf.o
 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
 
-test_default = test -x $(PERF)/perf
+test_default = test -x $(PERF_O)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
 test_default_O = test -x $$TMP_O/perf
@@ -247,12 +267,12 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
+clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
 
 $(run):
        $(call clean)
        @TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1; \
        echo "  test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +283,7 @@ $(run_O):
        $(call clean)
        @TMP_O=$$(mktemp -d); \
        TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1 && \
        echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -276,17 +296,22 @@ tarpkg:
        ( eval $$cmd ) >> $@ 2>&1 && \
        rm -f $@
 
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
 make_kernelsrc:
-       @echo "- make -C <kernelsrc> tools/perf"
+       @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
        $(call clean); \
-       (make -C ../.. tools/perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-       @echo "- make -C <kernelsrc>/tools perf"
+       @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
        $(call clean); \
-       (make -C ../../tools perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
index d4d7cc27252f1184bf2d678b6460f138099efd28..718bd46d47fa7bc88674a192dd1a8e8ab1fb7ca9 100644 (file)
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
                                nd = browser->curr_hot;
                        break;
                case K_UNTAB:
-                       if (nd != NULL)
+                       if (nd != NULL) {
                                nd = rb_next(nd);
                                if (nd == NULL)
                                        nd = rb_first(&browser->entries);
-                       else
+                       else
                                nd = browser->curr_hot;
                        break;
                case K_F1:
index c226303e3da045743fe676c7c2ffc0ff13eac674..68a7612019dc3c3be315604693b68170183044d6 100644 (file)
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                        symlen = unresolved_col_width + 4 + 2;
                        hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
                                           symlen);
+                       hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+                                          symlen);
                }
 
                if (h->mem_info->iaddr.sym) {
index d5636ba94b20f722d3f521614e8fae08b620fad4..40b7a0d0905b8d7f01972c6e38e225f108b15133 100644 (file)
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 
                machine = machines__find(machines, pid);
                if (!machine)
-                       machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
+                       machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
                return machine;
        }
 
index 2f901d15e06370d26d579869d351811f4d31f940..2b58edccd56f8bf63c0364d17b5ada656f4f582c 100644 (file)
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
        int i, ret;
 
        aggr->val = aggr->ena = aggr->run = 0;
-       init_stats(ps->res_stats);
 
        if (counter->per_pkg)
                zero_per_pkg(counter);
index 3b2de6eb33763b0ab1a23195529fda8d6367eb1d..ab02209a7cf3b162431bfc006287f5fbd8c68f43 100644 (file)
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         * Read the build id if possible. This is required for
         * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
         */
-       if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
+       if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
                dso__set_build_id(dso, build_id);
 
        /*
index 8ff7d620d9427490f529cf7ec63471ae1bcb3b17..33b52eaa39db293b0de4d41c7312f88b39a2c4e2 100644 (file)
@@ -209,7 +209,7 @@ static const struct flag flags[] = {
        { "NET_TX_SOFTIRQ", 2 },
        { "NET_RX_SOFTIRQ", 3 },
        { "BLOCK_SOFTIRQ", 4 },
-       { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+       { "IRQ_POLL_SOFTIRQ", 5 },
        { "TASKLET_SOFTIRQ", 6 },
        { "SCHED_SOFTIRQ", 7 },
        { "HRTIMER_SOFTIRQ", 8 },
index 7ec7df9e7fc731ab7e118425b5bd01acaab349d5..0c1a7e65bb815a692f9b20cd97b337c181699308 100644 (file)
@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
-pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
 {
        struct nfit_test_resource *nfit_res = get_nfit_res(addr);
 
index e86d937cc22c04cb36aecfe4fb4af0c5deed3825..60fe3c569bd90f9cbfdc9664720c598e9430e8a3 100644 (file)
@@ -45,7 +45,17 @@ static inline int ksft_exit_fail(void)
 }
 #endif
 
-#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+       return syscall(__NR_clock_adjtime, id, tx);
+}
+
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
@@ -193,10 +203,137 @@ out:
 }
 
 
+int set_offset(long long offset, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano) {
+               tmx.modes |= ADJ_NANO;
+
+               tmx.time.tv_sec = offset / NSEC_PER_SEC;
+               tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += NSEC_PER_SEC;
+               }
+       } else {
+               tmx.time.tv_sec = offset / USEC_PER_SEC;
+               tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += USEC_PER_SEC;
+               }
+       }
+
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret < 0) {
+               printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano)
+               tmx.modes |= ADJ_NANO;
+
+       tmx.time.tv_sec = sec;
+       tmx.time.tv_usec = usec;
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret >= 0) {
+               printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int validate_set_offset(void)
+{
+       printf("Testing ADJ_SETOFFSET... ");
+
+       /* Test valid values */
+       if (set_offset(NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC + 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC + 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+               return -1;
+
+       /* Test invalid values */
+       if (set_bad_offset(0, -1, 1))
+               return -1;
+       if (set_bad_offset(0, -1, 0))
+               return -1;
+       if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, -USEC_PER_SEC, 0))
+               return -1;
+
+       printf("[OK]\n");
+       return 0;
+}
+
 int main(int argc, char **argv)
 {
        if (validate_freq())
                return ksft_exit_fail();
 
+       if (validate_set_offset())
+               return ksft_exit_fail();
+
        return ksft_exit_pass();
 }
index 26b7926bda8849a8b931fff6750a3ed8217dae89..ba34f9e96efd6bd760870150dd9cb6ba0eacbd69 100644 (file)
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()       mb()
-# define dma_rmb()     barrier()
-# define dma_wmb()     barrier()
-# define smp_rmb()     barrier()
-# define smp_wmb()     barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+       typeof(var) virt_store_mb_value = (value); \
+       __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+                         __ATOMIC_SEQ_CST); \
+       barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644 (file)
index 0000000..845960e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+       (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
index 4db7d5691ba71b33d2bda8c85d634fe3781ae6d6..0338499482159883bb3e30452d5afc0f77af577e 100644 (file)
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644 (file)
index 0000000..feaa64a
--- /dev/null
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+       -rm main.o
+       -rm ring.o ring
+       -rm virtio_ring_0_9.o virtio_ring_0_9
+       -rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644 (file)
index 0000000..34e94c4
--- /dev/null
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644 (file)
index 0000000..3a5ff43
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = write(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = read(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void kick(void)
+{
+       notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+       wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+       notify(callfd);
+}
+
+void wait_for_call(void)
+{
+       wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+       cpu_set_t cpuset;
+       int ret;
+       pthread_t self;
+       long int cpu;
+       char *endptr;
+
+       if (!arg)
+               return;
+
+       cpu = strtol(arg, &endptr, 0);
+       assert(!*endptr);
+
+       assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+       self = pthread_self();
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+
+       ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+       assert(!ret);
+}
+
+static void run_guest(void)
+{
+       int completed_before;
+       int completed = 0;
+       int started = 0;
+       int bufs = runcycles;
+       int spurious = 0;
+       int r;
+       unsigned len;
+       void *buf;
+       int tokick = batch;
+
+       for (;;) {
+               if (do_sleep)
+                       disable_call();
+               completed_before = completed;
+               do {
+                       if (started < bufs &&
+                           started - completed < max_outstanding) {
+                               r = add_inbuf(0, NULL, "Hello, world!");
+                               if (__builtin_expect(r == 0, true)) {
+                                       ++started;
+                                       if (!--tokick) {
+                                               tokick = batch;
+                                               if (do_sleep)
+                                                       kick_available();
+                                       }
+
+                               }
+                       } else
+                               r = -1;
+
+                       /* Flush out completed bufs if any */
+                       if (get_buf(&len, &buf)) {
+                               ++completed;
+                               if (__builtin_expect(completed == bufs, false))
+                                       return;
+                               r = 0;
+                       }
+               } while (r == 0);
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               assert(started <= bufs);
+               if (do_sleep) {
+                       if (enable_call())
+                               wait_for_call();
+               } else {
+                       poll_used();
+               }
+       }
+}
+
+static void run_host(void)
+{
+       int completed_before;
+       int completed = 0;
+       int spurious = 0;
+       int bufs = runcycles;
+       unsigned len;
+       void *buf;
+
+       for (;;) {
+               if (do_sleep) {
+                       if (enable_kick())
+                               wait_for_kick();
+               } else {
+                       poll_avail();
+               }
+               if (do_sleep)
+                       disable_kick();
+               completed_before = completed;
+               while (__builtin_expect(use_buf(&len, &buf), true)) {
+                       if (do_sleep)
+                               call_used();
+                       ++completed;
+                       if (__builtin_expect(completed == bufs, false))
+                               return;
+               }
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               if (completed == bufs)
+                       break;
+       }
+}
+
+void *start_guest(void *arg)
+{
+       set_affinity(arg);
+       run_guest();
+       pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+       set_affinity(arg);
+       run_host();
+       pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+       {
+               .name = "help",
+               .has_arg = no_argument,
+               .val = 'h',
+       },
+       {
+               .name = "host-affinity",
+               .has_arg = required_argument,
+               .val = 'H',
+       },
+       {
+               .name = "guest-affinity",
+               .has_arg = required_argument,
+               .val = 'G',
+       },
+       {
+               .name = "ring-size",
+               .has_arg = required_argument,
+               .val = 'R',
+       },
+       {
+               .name = "run-cycles",
+               .has_arg = required_argument,
+               .val = 'C',
+       },
+       {
+               .name = "outstanding",
+               .has_arg = required_argument,
+               .val = 'o',
+       },
+       {
+               .name = "batch",
+               .has_arg = required_argument,
+               .val = 'b',
+       },
+       {
+               .name = "sleep",
+               .has_arg = no_argument,
+               .val = 's',
+       },
+       {
+               .name = "relax",
+               .has_arg = no_argument,
+               .val = 'x',
+       },
+       {
+               .name = "exit",
+               .has_arg = no_argument,
+               .val = 'e',
+       },
+       {
+       }
+};
+
+static void help(void)
+{
+       fprintf(stderr, "Usage: <test> [--help]"
+               " [--host-affinity H]"
+               " [--guest-affinity G]"
+               " [--ring-size R (default: %d)]"
+               " [--run-cycles C (default: %d)]"
+               " [--batch b]"
+               " [--outstanding o]"
+               " [--sleep]"
+               " [--relax]"
+               " [--exit]"
+               "\n",
+               ring_size,
+               runcycles);
+}
+
+int main(int argc, char **argv)
+{
+       int ret;
+       pthread_t host, guest;
+       void *tret;
+       char *host_arg = NULL;
+       char *guest_arg = NULL;
+       char *endptr;
+       long int c;
+
+       kickfd = eventfd(0, 0);
+       assert(kickfd >= 0);
+       callfd = eventfd(0, 0);
+       assert(callfd >= 0);
+
+       for (;;) {
+               int o = getopt_long(argc, argv, optstring, longopts, NULL);
+               switch (o) {
+               case -1:
+                       goto done;
+               case '?':
+                       help();
+                       exit(2);
+               case 'H':
+                       host_arg = optarg;
+                       break;
+               case 'G':
+                       guest_arg = optarg;
+                       break;
+               case 'R':
+                       ring_size = strtol(optarg, &endptr, 0);
+                       assert(ring_size && !(ring_size & (ring_size - 1)));
+                       assert(!*endptr);
+                       break;
+               case 'C':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       runcycles = c;
+                       break;
+               case 'o':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       max_outstanding = c;
+                       break;
+               case 'b':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       batch = c;
+                       break;
+               case 's':
+                       do_sleep = true;
+                       break;
+               case 'x':
+                       do_relax = true;
+                       break;
+               case 'e':
+                       do_exit = true;
+                       break;
+               default:
+                       help();
+                       exit(4);
+                       break;
+               }
+       }
+
+       /* does nothing here, used to make sure all smp APIs compile */
+       smp_acquire();
+       smp_release();
+       smp_mb();
+done:
+
+       if (batch > max_outstanding)
+               batch = max_outstanding;
+
+       if (optind < argc) {
+               help();
+               exit(4);
+       }
+       alloc_ring();
+
+       ret = pthread_create(&host, NULL, start_host, host_arg);
+       assert(!ret);
+       ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+       assert(!ret);
+
+       ret = pthread_join(guest, &tret);
+       assert(!ret);
+       ret = pthread_join(host, &tret);
+       assert(!ret);
+       return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644 (file)
index 0000000..16917ac
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+       unsigned long long t;
+
+       t = __rdtsc();
+       while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+       _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+       if (do_relax)
+               cpu_relax();
+       else
+               /* prevent compiler from removing busy loops */
+               barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644 (file)
index 0000000..c25c8d2
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+                             unsigned short next,
+                             unsigned short prev)
+{
+       return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+       unsigned short flags;
+       unsigned short index;
+       unsigned len;
+       unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+       unsigned short kick_index;
+       unsigned char reserved0[HOST_GUEST_PADDING - 2];
+       unsigned short call_index;
+       unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+       void *buf; /* descriptor is writeable, we can't get buf from there */
+       void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+       unsigned avail_idx;
+       unsigned last_used_idx;
+       unsigned num_free;
+       unsigned kicked_avail_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned used_idx;
+       unsigned called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+
+       ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       event = malloc(sizeof *event);
+       if (!event) {
+               perror("Unable to allocate event buffer.\n");
+               exit(3);
+       }
+       memset(event, 0, sizeof *event);
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       for (i = 0; i < ring_size; ++i) {
+               struct desc desc = {
+                       .index = i,
+               };
+               ring[i] = desc;
+       }
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, index;
+
+       if (!guest.num_free)
+               return -1;
+
+       guest.num_free--;
+       head = (ring_size - 1) & (guest.avail_idx++);
+
+       /* Start with a write. On MESI architectures this helps
+        * avoid a shared state with consumer that is polling this descriptor.
+        */
+       ring[head].addr = (unsigned long)(void*)buf;
+       ring[head].len = len;
+       /* read below might bypass write above. That is OK because it's just an
+        * optimization. If this happens, we will get the cache line in a
+        * shared state which is unfortunate, but probably not worth it to
+        * add an explicit full barrier to avoid this.
+        */
+       barrier();
+       index = ring[head].index;
+       data[index].buf = buf;
+       data[index].data = datap;
+       /* Barrier A (for pairing) */
+       smp_release();
+       ring[head].flags = DESC_HW;
+
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+       unsigned index;
+       void *datap;
+
+       if (ring[head].flags & DESC_HW)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       *lenp = ring[head].len;
+       index = ring[head].index & (ring_size - 1);
+       datap = data[index].data;
+       *bufp = data[index].buf;
+       data[index].buf = NULL;
+       data[index].data = NULL;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       while (ring[head].flags & DESC_HW)
+               busy_wait();
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       event->call_index = guest.last_used_idx;
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!need_event(event->kick_index,
+                       guest.avail_idx,
+                       guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       event->kick_index = host.used_idx;
+       /* Barrier C (for pairing) */
+       smp_mb();
+       return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       while (!(ring[head].flags & DESC_HW))
+               busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       if (!(ring[head].flags & DESC_HW))
+               return false;
+
+       /* make sure length read below is not speculated */
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       /* simple in-order completion: we don't need
+        * to touch index at all. This also means we
+        * can just modify the descriptor in-place.
+        */
+       ring[head].len--;
+       /* Make sure len is valid before flags.
+        * Note: alternative is to write len and flags in one access -
+        * possible on 64 bit architectures but wmb is free on Intel anyway
+        * so I have no way to test whether it's a gain.
+        */
+       /* Barrier B (for pairing) */
+       smp_release();
+       ring[head].flags = 0;
+       host.used_idx++;
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!need_event(event->call_index,
+                       host.used_idx,
+                       host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755 (executable)
index 0000000..52b0f71
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+       #Don't run guest and host on same CPU
+       #It actually works ok if using signalling
+       if
+               (echo "$@" | grep -e "--sleep" > /dev/null) || \
+                       test $HOST_AFFINITY '!=' $cpu
+       then
+               echo "GUEST AFFINITY $cpu"
+               "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+       fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644 (file)
index 0000000..47c9a1a
--- /dev/null
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+       void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+       unsigned short avail_idx;
+       unsigned short last_used_idx;
+       unsigned short num_free;
+       unsigned short kicked_avail_idx;
+       unsigned short free_head;
+       unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned short used_idx;
+       unsigned short called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+       void *p;
+
+       ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       memset(p, 0, vring_size(ring_size, 0x1000));
+       vring_init(&ring, ring_size, p, 0x1000);
+
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       /* Put everything in free lists. */
+       guest.free_head = 0;
+       for (i = 0; i < ring_size - 1; i++)
+               ring.desc[i].next = i + 1;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, avail;
+       struct vring_desc *desc;
+
+       if (!guest.num_free)
+               return -1;
+
+       head = guest.free_head;
+       guest.num_free--;
+
+       desc = ring.desc;
+       desc[head].flags = VRING_DESC_F_NEXT;
+       desc[head].addr = (unsigned long)(void *)buf;
+       desc[head].len = len;
+       /* We do it like this to simulate the way
+        * we'd have to flip it if we had multiple
+        * descriptors.
+        */
+       desc[head].flags &= ~VRING_DESC_F_NEXT;
+       guest.free_head = desc[head].next;
+
+       data[head].data = datap;
+
+#ifdef RING_POLL
+       /* Barrier A (for pairing) */
+       smp_release();
+       avail = guest.avail_idx++;
+       ring.avail->ring[avail & (ring_size - 1)] =
+               (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+       avail = (ring_size - 1) & (guest.avail_idx++);
+       ring.avail->ring[avail] = head;
+       /* Barrier A (for pairing) */
+       smp_release();
+#endif
+       ring.avail->idx = guest.avail_idx;
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head;
+       unsigned index;
+       void *datap;
+
+#ifdef RING_POLL
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+       if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       index &= ring_size - 1;
+#else
+       if (ring.used->idx == guest.last_used_idx)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+#endif
+       *lenp = ring.used->ring[head].len;
+       datap = data[index].data;
+       *bufp = (void*)(unsigned long)ring.desc[index].addr;
+       data[index].data = NULL;
+       ring.desc[index].next = guest.free_head;
+       guest.free_head = index;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       for (;;) {
+               unsigned index = ring.used->ring[head].id;
+
+               if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       unsigned head = guest.last_used_idx;
+
+       while (ring.used->idx == head)
+               busy_wait();
+#endif
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned short last_used_idx;
+
+       vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned short head = last_used_idx & (ring_size - 1);
+               unsigned index = ring.used->ring[head].id;
+
+               return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_avail_event(&ring),
+                             guest.avail_idx,
+                             guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = host.used_idx;
+
+       vring_avail_event(&ring) = head;
+       /* Barrier C (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+               return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+       unsigned head = host.used_idx;
+#ifdef RING_POLL
+       for (;;) {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+               if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       while (ring.avail->idx == head)
+               busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned used_idx = host.used_idx;
+       struct vring_desc *desc;
+       unsigned head;
+
+#ifdef RING_POLL
+       head = ring.avail->ring[used_idx & (ring_size - 1)];
+       if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+               return false;
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       desc = &ring.desc[head & (ring_size - 1)];
+#else
+       if (used_idx == ring.avail->idx)
+               return false;
+
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       head = ring.avail->ring[used_idx];
+       desc = &ring.desc[head];
+#endif
+
+       *lenp = desc->len;
+       *bufp = (void *)(unsigned long)desc->addr;
+
+       /* now update used ring */
+       ring.used->ring[used_idx].id = head;
+       ring.used->ring[used_idx].len = desc->len - 1;
+       /* Barrier B (for pairing) */
+       smp_release();
+       host.used_idx++;
+       ring.used->idx = host.used_idx;
+       
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_used_event(&ring),
+                             host.used_idx,
+                             host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644 (file)
index 0000000..84fc2c5
--- /dev/null
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"